Select Git revision
get_train_annot_YOLO.py

Stephane Chavin authored
get_train_annot_YOLO.py 7.54 KiB
import os
import pandas as pd
import librosa
import numpy as np
import matplotlib.pyplot as plt
from p_tqdm import p_map
import ipdb
import random
from datetime import date
import argparse
import matplotlib.patches as patches
from matplotlib.patches import Rectangle
from PIL import Image
from mycolorpy import colorlist as mcp
today = date.today()
def arg_directory(path):
if os.path.isdir(path):
return path
else:
raise argparse.ArgumentTypeError(f'`{path}` is not a valid path')
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
parser.add_argument('-f','--filename_path', type= str, help = 'Path and name of the file containing the annotations',required=True)
parser.add_argument('-p','--path_to_data', type=arg_directory, help = 'Path of the folder that contain the recordings',required=True)
parser.add_argument('-d','--directory', type=arg_directory, help = 'Directory to wich spectrograms and .txt files will be stored',required=True)
parser.add_argument('-m','--mode',type=str,choices=['uniform','personalized'],help = 'Choose the mode to calculate the y and height value',required=True)
parser.add_argument('-u','--unique',type=str, choices=['unique','multiple'], help = 'unique for only one spectrogram per file, multple for multiple spectrogram',required=True)
parser.add_argument('-c','--columns_name',type=str,help = 'Name of the column that contain the path',required=True)
parser.add_argument('--export',type=str, default=None, help='To export the position of the bounding box on the spectrogram',required=False)
args = parser.parse_args()
directory = args.directory
DURATION = 8
NB_CLASS = 5
df = pd.read_csv(args.filename_path, low_memory=False)
df.rename(columns={'label':'Code'},inplace=True)
df.rename(columns={'annotation_initial_time':'start'},inplace=True)
df.rename(columns={'annotation_final_time':'stop'},inplace=True)
df.rename(columns={'duree':'d_annot'},inplace=True)
df.rename(columns={'min_frequency':'min_freq'},inplace=True)
df.rename(columns={'max_frequency':'max_freq'},inplace=True)
df.rename(columns={'avg_frequency':'midl_y'},inplace=True)
tab = df.groupby('Code').count()
tab = tab.sort_values(tab.columns[0],ascending = False)[:NB_CLASS]
df = df[df.Code.isin(tab.index)]
try :
df['max_freq'].fillna(9000,inplace = True)
df['min_freq'].fillna(1000,inplace = True)
except Exception:
df['max_freq'] = 9000
df['min_freq'] = 1000
df['midl_y'] = 5000
df['d_annot'] = df.stop - df.start
df['midl'] = (df.stop + df.start)/2
df['Path'] = df[args.columns_name]
df = df[df.d_annot<8]
df = df.reset_index()
list_espece = df.groupby('Code').count().sort_values(df.columns[0],ascending = False)
data = pd.DataFrame(columns = ['espece','ind'])
for i in range (len(list_espece)):
esp = list_espece.index[i]
new_col = pd.DataFrame([[esp,i]],columns = ['espece','ind'])
data = pd.concat([data,new_col])
liste_espece = data.espece
liste_espece.to_csv(str(directory+'liste_especes.csv'),index = False)
print('\n',data)
color = mcp.gen_color(cmap = "Wistia", n= len(list_espece))
def process(x):
count, (f, grp) = x
filename = str(f)
duration = DURATION
while len(grp) != 0:
tab = grp[grp.midl <= grp.start.iloc[0]+7]
fin = pd.DataFrame(columns = ['id','x', 'y', 'width', 'height'])
duree = tab.d_annot
if len(tab)==0:
tab = grp
print(tab)
rd = round(random.uniform(-1.5,1.5),2)
if args.unique == 'multiple':
if tab.start.iloc[0] <= 3:
offset = 0
if tab.start.iloc[0] == 3:
offset = 1
if tab.start.iloc[0] >= 3:
offset = tab.midl.iloc[0] - 3.5 + rd
elif args.unique == 'unique':
offset= 0
window_size = 1024
window = np.hanning(window_size)
y, sr = librosa.load(filename, offset = offset, duration = duration, sr = None)
stft = librosa.core.spectrum.stft(y, n_fft=window_size, hop_length=512, window=window)
spectrum, freq, time, im = plt.specgram(y, Fs=sr, NFFT=window_size, noverlap=512, cmap='jet')
vmin = np.flipud(np.log10(np.abs(stft))).mean()
vmax = np.flipud(np.log10(np.abs(stft))).max()
plt.close()
plt.imshow(np.flipud(np.log10(np.abs(stft))),aspect = "auto", interpolation = None, cmap = 'jet', vmin = vmin, vmax = vmax)
plt.subplots_adjust(top=1, bottom=0, left=0, right=1)
for idxs, row in tab.iterrows():
species = row.Code
x_pxl = (row.midl - offset) / duration
width_pxl = (row.stop-row.start)/duration
if args.mode == 'uniform':
height_pxl = 0.8
y_pxl = 0.5
else:
y_pxl = 1 - (row.midl_y / (sr/2))
height_pxl = (row.max_freq - row.min_freq)/(sr/2)
if height_pxl > 1:
height_pxl = 1
elif height_pxl > y_pxl*2:
y_pxl=y_pxl+0.5*(height_pxl-y_pxl*2)
annotation = pd.DataFrame([[str(data.loc[data.espece == row.Code,'ind'][0]),x_pxl,y_pxl,width_pxl,height_pxl]],columns = ['id','x', 'y', 'width', 'height'])
fin = pd.concat([fin,annotation])
grp = grp.drop(tab.index)
name = str(row.Path.replace('/','_').replace('.','_')+'_'+str(count))
name_file = os.path.join(directory,str('labels_'+str(today.day)+'_'+str(today.month)),str(name+'.txt'))
try :
plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),row.Code,str(name+'.jpg')))
fin.to_csv(name_file,sep =' ',header=False,index=False)
plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all',str(name+'.jpg')))
except :
os.mkdir(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month))))
for especes in list_espece.index:
os.mkdir(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),especes))
os.mkdir(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all'))
os.mkdir(os.path.join(directory,str('labels_'+str(today.day)+'_'+str(today.month))))
fin.to_csv(name_file,sep =' ',header=False,index=False)
plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),row.Code,str(name+'.jpg')))
plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all',str(name+'.jpg')))
if args.export != None:
for l in range(len(fin)):
try :
plt.gca().add_patch(Rectangle(((fin.x.iloc[l]*len(time))-(0.5*fin.width.iloc[l]*len(time)), ((fin.y.iloc[l]*len(freq))+0.5*fin.height.iloc[l]*len(freq))), fin.width.iloc[l]*len(time), fin.height.iloc[l]*len(freq), linewidth=3, edgecolor=color[int(fin.id.iloc[l])], facecolor='none'))
except IndexError:
ipdb.set_trace()
try:
plt.savefig(os.path.join(directory, str('images_annotes_'+str(today.day)+'_'+str(today.month)),str(name+'.jpg')))
except Exception:
os.mkdir(os.path.join(directory,str('images_annotes_'+str(today.day)+'_'+str(today.month))))
plt.savefig(os.path.join(directory, str('images_annotes_'+str(today.day)+'_'+str(today.month)),str(name+'.jpg')))
plt.close()
p_map(process, enumerate(df.groupby('Path')), num_cpus=2, total = len(df.groupby('Path')))
print('saved to ',directory)