Skip to content
Snippets Groups Projects
Select Git revision
  • 2824ad17aeaf4241a2040e0d88cdb71550de12b4
  • main default protected
  • V1
3 results

get_train_annot_YOLO.py

Blame
  • get_train_annot_YOLO.py 8.74 KiB
    import os
    import pandas as pd
    import librosa
    import numpy as np
    import matplotlib.pyplot as plt
    from p_tqdm import p_map
    import ipdb
    import random
    from datetime import date
    import argparse
    import cv2
    import matplotlib.patches as patches
    from matplotlib.patches import Rectangle
    from random import randrange
    from PIL import Image
    from mycolorpy import colorlist as mcp
    
    today = date.today()
    
    def arg_directory(path):
        if os.path.isdir(path):
            return path
        else:
            raise argparse.ArgumentTypeError(f'`{path}` is not a valid path')
    
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
    parser.add_argument('-f','--filename_path', type= str, help = 'Path and name of the file containing the annotations',required=True)
    parser.add_argument('-p','--path_to_data', type=arg_directory, help = 'Path of the folder that contain the recordings',required=True)
    parser.add_argument('-d','--directory', type=arg_directory, help = 'Directory to wich spectrograms and .txt files will be stored',required=True)
    parser.add_argument('-m','--mode',type=str,choices=['uniform','personalized'],help = 'Choose the mode to calculate the y and height value',required=True)
    parser.add_argument('-u','--unique',type=str, choices=['unique','multiple'], help = 'unique for only one spectrogram per file, multple for multiple spectrogram',required=True)
    parser.add_argument('-c','--columns_name',type=str,help = 'Name of the column that contain the path',required=True)
    parser.add_argument('--export',type=str, default=None, help='To export the position of the bounding box on the spectrogram',required=False)
    args = parser.parse_args()
    
    directory = args.directory
    
    DURATION = 8
    NB_CLASS = 5
    
    df = pd.read_csv(args.filename_path, low_memory=False)
    
    df.rename(columns={'label':'Code'},inplace=True)
    df.rename(columns={'annotation_initial_time':'start'},inplace=True)
    df.rename(columns={'annotation_final_time':'stop'},inplace=True)
    df.rename(columns={'duree':'d_annot'},inplace=True)
    df.rename(columns={'min_frequency':'min_freq'},inplace=True)
    df.rename(columns={'max_frequency':'max_freq'},inplace=True)
    df.rename(columns={'avg_frequency':'midl_y'},inplace=True)
    
    tab = df.groupby('Code').count()
    tab = tab.sort_values(tab.columns[0],ascending = False)[:NB_CLASS]
    df = df[df.Code.isin(tab.index)]
    
    try :
        df['max_freq'].fillna(9000,inplace = True)
        df['min_freq'].fillna(1000,inplace = True)
    except Exception:
        df['max_freq'] = 9000
        df['min_freq'] = 1000
        df['midl_y'] = 5000
    
    df['d_annot'] = df.stop - df.start
    df['midl'] = (df.stop + df.start)/2
    df['Path'] = df[args.columns_name]
    
    df = df[df.d_annot<8]
    df = df.reset_index()
    
    list_espece = df.groupby('Code').count().sort_values(df.columns[0],ascending = False)
    data = pd.DataFrame(columns = ['espece','ind'])
    
    for i in range (len(list_espece)):
        esp = list_espece.index[i]
        new_col = pd.DataFrame([[esp,i]],columns = ['espece','ind'])
        data = pd.concat([data,new_col])
    
    liste_espece = data.espece
    liste_espece.to_csv(str(directory+'liste_especes.csv'),index = False)
    
    print('\n',data)
    
    #color = mcp.gen_color(cmap = "Wistia", n= len(list_espece))
    
    colors = pd.DataFrame(columns = ['color', 'species'])
    
    for i in range (30):
        r = randrange(255)
        g = randrange(255)
        b = randrange(255)
        rand_color = (r, g, b)
        new = pd.DataFrame([[rand_color, i]], columns = ['color', 'species'])
        colors = pd.concat([colors, new])
    
    
    def process(x):
        count, (f, grp) = x
        filename = str(f)
        duration = DURATION
    
        while len(grp) != 0:
    
            tab = grp[grp.midl <= grp.start.iloc[0]+7]
            fin = pd.DataFrame(columns = ['id','x', 'y', 'width', 'height'])
            duree = tab.d_annot
    
            if len(tab)==0:
                tab = grp
                print(tab)
            rd = round(random.uniform(-1.5,1.5),2)
    
            if args.unique == 'multiple':
                if tab.start.iloc[0] <= 3:
                    offset = 0
                if tab.start.iloc[0] == 3:
                    offset = 1
                if tab.start.iloc[0] >= 3:
                    offset = tab.midl.iloc[0] - 3.5 + rd
            elif args.unique == 'unique':
                offset= 0
    
            window_size = 1024
            window = np.hanning(window_size)
    
            y, sr = librosa.load(filename, offset = offset, duration = duration, sr = None)
            stft  = librosa.core.spectrum.stft(y, n_fft=window_size, hop_length=512, window=window)
            spectrum, freq, time, im = plt.specgram(y, Fs=sr, NFFT=window_size, noverlap=512, cmap='jet')
    
            vmin = np.flipud(np.log10(np.abs(stft))).mean()
            vmax = np.flipud(np.log10(np.abs(stft))).max()
    
            plt.close()
            plt.imshow(np.flipud(np.log10(np.abs(stft))),aspect = "auto", interpolation = None, cmap = 'jet', vmin = vmin, vmax = vmax)
            plt.subplots_adjust(top=1, bottom=0, left=0, right=1)
    
            for idxs, row in tab.iterrows():
                species = row.Code
                x_pxl = (row.midl - offset) / duration
                width_pxl = (row.stop-row.start)/duration
    
                if args.mode == 'uniform':
                    height_pxl = 0.8
                    y_pxl = 0.5
    
                else:
                    y_pxl = 1 - (row.midl_y / (sr/2))
    
                    height_pxl = (row.max_freq - row.min_freq)/(sr/2)
                    if height_pxl > 1:
                        height_pxl = 1
                    elif height_pxl > y_pxl*2:
                        y_pxl=y_pxl+0.5*(height_pxl-y_pxl*2)
    
                annotation = pd.DataFrame([[str(data.loc[data.espece == row.Code,'ind'][0]),x_pxl,y_pxl,width_pxl,height_pxl]],columns = ['id','x', 'y', 'width', 'height'])
    
                fin = pd.concat([fin,annotation])
            grp = grp.drop(tab.index)
    
            name = str(row.Path.replace('/','_').replace('.','_')+'_'+str(count))
            name_file = os.path.join(directory,str('labels_'+str(today.day)+'_'+str(today.month)),str(name+'.txt'))
    
            try :
                plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),row.Code,str(name+'.jpg')))
                fin.to_csv(name_file,sep =' ',header=False,index=False)
                plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all',str(name+'.jpg')))
    
            except :
                os.mkdir(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month))))
                for especes in list_espece.index:
    
                    os.mkdir(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),especes))
                os.mkdir(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all'))
                os.mkdir(os.path.join(directory,str('labels_'+str(today.day)+'_'+str(today.month))))
    
                fin.to_csv(name_file,sep =' ',header=False,index=False)
                plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),row.Code,str(name+'.jpg')))
                plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all',str(name+'.jpg')))
    
            plt.close()
    
            if args.export != None:
    
                im = cv2.imread(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all',str(name+'.jpg')))
                im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
                H,W = im.shape[0], im.shape[1]
                for l in range(len(fin)):
    
                    x, y, w, h = fin.x.iloc[l]*W , fin.y.iloc[l]*H , fin.w.iloc[l]*W , fin.h.iloc[l]*H
    
                    shape1 = (int(x-(0.5*w)), int(y+(0.5*h)))
                    shape2 = (int(x+(0.5*w)), int(y+(0.5*h)))
                    shape3 = (int(x+-(0.5*w)), int(y-(0.5*h)))
                    shape4 = (int(x+(0.5*w)), int(y-(0.5*h)))
    
                    #rectangle text shape
    
                    shp1 = shape4[0]-10, shape4[1]+20
                    shp2 = shape4[0], shape4[1]+20
                    shp3 = shape4[0]-10, shape4[1]
                    shp4 = shape4[0], shape4[1]
    
                    #text placement
    
                    text_shape = shp1[0], shp1[1]-5      
    
                    label = str(fin.id.iloc[l])
                    cv2.rectangle(im, pt1=shape1, pt2=shape4, color= colors[colors.species == label].color, thickness=1)
    
                    cv2.rectangle(im, pt1=shp1 , pt2= shp4, color= colors[colors.species == label].color, thickness= -1)
                    cv2.putText(im, label, text_shape, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1)
                    
                plt.imshow(im)
                try:
                    plt.savefig(os.path.join(directory, str('images_annotes_'+str(today.day)+'_'+str(today.month)),str(name+'.jpg')))
    
                except Exception:
                    os.mkdir(os.path.join(directory,str('images_annotes_'+str(today.day)+'_'+str(today.month))))
                    plt.savefig(os.path.join(directory, str('images_annotes_'+str(today.day)+'_'+str(today.month)),str(name+'.jpg')))
    
            plt.close()
    
    p_map(process, enumerate(df.groupby('Path')), num_cpus=2, total = len(df.groupby('Path')))
    
    print('saved to ',directory)