get_train_annot_YOLO.py

import os
import pandas as pd
import librosa
import numpy as np
import matplotlib.pyplot as plt
from p_tqdm import p_map
import ipdb
import random
from datetime import date
import argparse
import cv2
import matplotlib.patches as patches
from matplotlib.patches import Rectangle
from random import randrange
from PIL import Image
from mycolorpy import colorlist as mcp

today = date.today()

def arg_directory(path):
    if os.path.isdir(path):
        return path
    else:
        raise argparse.ArgumentTypeError(f'`{path}` is not a valid path')

parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
parser.add_argument('-f','--filename_path', type= str, help = 'Path and name of the file containing the annotations',required=True)
parser.add_argument('-p','--path_to_data', type=arg_directory, help = 'Path of the folder that contain the recordings',required=True)
parser.add_argument('-d','--directory', type=arg_directory, help = 'Directory to wich spectrograms and .txt files will be stored',required=True)
parser.add_argument('-m','--mode',type=str,choices=['uniform','personalized'],help = 'Choose the mode to calculate the y and height value',required=True)
parser.add_argument('-u','--unique',type=str, choices=['unique','multiple'], help = 'unique for only one spectrogram per file, multple for multiple spectrogram',required=True)
parser.add_argument('-c','--columns_name',type=str,help = 'Name of the column that contain the path',required=True)
parser.add_argument('--export',type=str, default=None, help='To export the position of the bounding box on the spectrogram',required=False)
args = parser.parse_args()

directory = args.directory

DURATION = 8
NB_CLASS = 5

df = pd.read_csv(args.filename_path, low_memory=False)

df.rename(columns={'label':'Code'},inplace=True)
df.rename(columns={'annotation_initial_time':'start'},inplace=True)
df.rename(columns={'annotation_final_time':'stop'},inplace=True)
df.rename(columns={'duree':'d_annot'},inplace=True)
df.rename(columns={'min_frequency':'min_freq'},inplace=True)
df.rename(columns={'max_frequency':'max_freq'},inplace=True)
df.rename(columns={'avg_frequency':'midl_y'},inplace=True)

tab = df.groupby('Code').count()
tab = tab.sort_values(tab.columns[0],ascending = False)[:NB_CLASS]
df = df[df.Code.isin(tab.index)]

try :
    df['max_freq'].fillna(9000,inplace = True)
    df['min_freq'].fillna(1000,inplace = True)
except Exception:
    df['max_freq'] = 9000
    df['min_freq'] = 1000
    df['midl_y'] = 5000

df['d_annot'] = df.stop - df.start
df['midl'] = (df.stop + df.start)/2
df['Path'] = df[args.columns_name]

df = df[df.d_annot<8]
df = df.reset_index()

list_espece = df.groupby('Code').count().sort_values(df.columns[0],ascending = False)
data = pd.DataFrame(columns = ['espece','ind'])

for i in range (len(list_espece)):
    esp = list_espece.index[i]
    new_col = pd.DataFrame([[esp,i]],columns = ['espece','ind'])
    data = pd.concat([data,new_col])

liste_espece = data.espece
liste_espece.to_csv(str(directory+'liste_especes.csv'),index = False)

print('\n',data)

#color = mcp.gen_color(cmap = "Wistia", n= len(list_espece))

colors = pd.DataFrame(columns = ['color', 'species'])

for i in range (30):
    r = randrange(255)
    g = randrange(255)
    b = randrange(255)
    rand_color = (r, g, b)
    new = pd.DataFrame([[rand_color, i]], columns = ['color', 'species'])
    colors = pd.concat([colors, new])


def process(x):
    count, (f, grp) = x
    filename = str(f)
    duration = DURATION

    while len(grp) != 0:

        tab = grp[grp.midl <= grp.start.iloc[0]+7]
        fin = pd.DataFrame(columns = ['id','x', 'y', 'width', 'height'])
        duree = tab.d_annot

        if len(tab)==0:
            tab = grp
            print(tab)
        rd = round(random.uniform(-1.5,1.5),2)

        if args.unique == 'multiple':
            if tab.start.iloc[0] <= 3:
                offset = 0
            if tab.start.iloc[0] == 3:
                offset = 1
            if tab.start.iloc[0] >= 3:
                offset = tab.midl.iloc[0] - 3.5 + rd
        elif args.unique == 'unique':
            offset= 0

        window_size = 1024
        window = np.hanning(window_size)

        y, sr = librosa.load(filename, offset = offset, duration = duration, sr = None)
        stft  = librosa.core.spectrum.stft(y, n_fft=window_size, hop_length=512, window=window)
        spectrum, freq, time, im = plt.specgram(y, Fs=sr, NFFT=window_size, noverlap=512, cmap='jet')

        vmin = np.flipud(np.log10(np.abs(stft))).mean()
        vmax = np.flipud(np.log10(np.abs(stft))).max()

        plt.close()
        plt.imshow(np.flipud(np.log10(np.abs(stft))),aspect = "auto", interpolation = None, cmap = 'jet', vmin = vmin, vmax = vmax)
        plt.subplots_adjust(top=1, bottom=0, left=0, right=1)

        for idxs, row in tab.iterrows():
            species = row.Code
            x_pxl = (row.midl - offset) / duration
            width_pxl = (row.stop-row.start)/duration

            if args.mode == 'uniform':
                height_pxl = 0.8
                y_pxl = 0.5

            else:
                y_pxl = 1 - (row.midl_y / (sr/2))

                height_pxl = (row.max_freq - row.min_freq)/(sr/2)
                if height_pxl > 1:
                    height_pxl = 1
                elif height_pxl > y_pxl*2:
                    y_pxl=y_pxl+0.5*(height_pxl-y_pxl*2)

            annotation = pd.DataFrame([[str(data.loc[data.espece == row.Code,'ind'][0]),x_pxl,y_pxl,width_pxl,height_pxl]],columns = ['id','x', 'y', 'width', 'height'])

            fin = pd.concat([fin,annotation])
        grp = grp.drop(tab.index)

        name = str(row.Path.replace('/','_').replace('.','_')+'_'+str(count))
        name_file = os.path.join(directory,str('labels_'+str(today.day)+'_'+str(today.month)),str(name+'.txt'))

        try :
            plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),row.Code,str(name+'.jpg')))
            fin.to_csv(name_file,sep =' ',header=False,index=False)
            plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all',str(name+'.jpg')))

        except :
            os.mkdir(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month))))
            for especes in list_espece.index:

                os.mkdir(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),especes))
            os.mkdir(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all'))
            os.mkdir(os.path.join(directory,str('labels_'+str(today.day)+'_'+str(today.month))))

            fin.to_csv(name_file,sep =' ',header=False,index=False)
            plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),row.Code,str(name+'.jpg')))
            plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all',str(name+'.jpg')))

        plt.close()

        if args.export != None:

            im = cv2.imread(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all',str(name+'.jpg')))
            im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
            H,W = im.shape[0], im.shape[1]
            for l in range(len(fin)):

                x, y, w, h = fin.x.iloc[l]*W , fin.y.iloc[l]*H , fin.w.iloc[l]*W , fin.h.iloc[l]*H

                shape1 = (int(x-(0.5*w)), int(y+(0.5*h)))
                shape2 = (int(x+(0.5*w)), int(y+(0.5*h)))
                shape3 = (int(x+-(0.5*w)), int(y-(0.5*h)))
                shape4 = (int(x+(0.5*w)), int(y-(0.5*h)))

                #rectangle text shape

                shp1 = shape4[0]-10, shape4[1]+20
                shp2 = shape4[0], shape4[1]+20
                shp3 = shape4[0]-10, shape4[1]
                shp4 = shape4[0], shape4[1]

                #text placement

                text_shape = shp1[0], shp1[1]-5

                label = str(fin.id.iloc[l])
                cv2.rectangle(im, pt1=shape1, pt2=shape4, color= colors[colors.species == label].color, thickness=1)

                cv2.rectangle(im, pt1=shp1 , pt2= shp4, color= colors[colors.species == label].color, thickness= -1)
                cv2.putText(im, label, text_shape, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1)

            plt.imshow(im)
            try:
                plt.savefig(os.path.join(directory, str('images_annotes_'+str(today.day)+'_'+str(today.month)),str(name+'.jpg')))

            except Exception:
                os.mkdir(os.path.join(directory,str('images_annotes_'+str(today.day)+'_'+str(today.month))))
                plt.savefig(os.path.join(directory, str('images_annotes_'+str(today.day)+'_'+str(today.month)),str(name+'.jpg')))

        plt.close()

p_map(process, enumerate(df.groupby('Path')), num_cpus=2, total = len(df.groupby('Path')))

print('saved to ',directory)