Skip to content
Snippets Groups Projects
Select Git revision
  • 4f6e95c413e1c334814947b820c184e9fd2e19ff
  • main default protected
  • V1
3 results

get_train_annot_YOLO.py

Blame
  • get_train_annot_YOLO.py 7.54 KiB
    import os
    import pandas as pd
    import librosa
    import numpy as np
    import matplotlib.pyplot as plt
    from p_tqdm import p_map
    import ipdb
    import random
    from datetime import date
    import argparse
    import matplotlib.patches as patches
    from matplotlib.patches import Rectangle
    from PIL import Image
    from mycolorpy import colorlist as mcp
    
    today = date.today()
    
    def arg_directory(path):
        if os.path.isdir(path):
            return path
        else:
            raise argparse.ArgumentTypeError(f'`{path}` is not a valid path')
    
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
    parser.add_argument('-f','--filename_path', type= str, help = 'Path and name of the file containing the annotations',required=True)
    parser.add_argument('-p','--path_to_data', type=arg_directory, help = 'Path of the folder that contain the recordings',required=True)
    parser.add_argument('-d','--directory', type=arg_directory, help = 'Directory to wich spectrograms and .txt files will be stored',required=True)
    parser.add_argument('-m','--mode',type=str,choices=['uniform','personalized'],help = 'Choose the mode to calculate the y and height value',required=True)
    parser.add_argument('-u','--unique',type=str, choices=['unique','multiple'], help = 'unique for only one spectrogram per file, multple for multiple spectrogram',required=True)
    parser.add_argument('-c','--columns_name',type=str,help = 'Name of the column that contain the path',required=True)
    parser.add_argument('--export',type=str, default=None, help='To export the position of the bounding box on the spectrogram',required=False)
    args = parser.parse_args()
    
    directory = args.directory
    
    DURATION = 8
    NB_CLASS = 5
    
    df = pd.read_csv(args.filename_path, low_memory=False)
    
    df.rename(columns={'label':'Code'},inplace=True)
    df.rename(columns={'annotation_initial_time':'start'},inplace=True)
    df.rename(columns={'annotation_final_time':'stop'},inplace=True)
    df.rename(columns={'duree':'d_annot'},inplace=True)
    df.rename(columns={'min_frequency':'min_freq'},inplace=True)
    df.rename(columns={'max_frequency':'max_freq'},inplace=True)
    df.rename(columns={'avg_frequency':'midl_y'},inplace=True)
    
    tab = df.groupby('Code').count()
    tab = tab.sort_values(tab.columns[0],ascending = False)[:NB_CLASS]
    df = df[df.Code.isin(tab.index)]
    
    try :
        df['max_freq'].fillna(9000,inplace = True)
        df['min_freq'].fillna(1000,inplace = True)
    except Exception:
        df['max_freq'] = 9000
        df['min_freq'] = 1000
        df['midl_y'] = 5000
    
    df['d_annot'] = df.stop - df.start
    df['midl'] = (df.stop + df.start)/2
    df['Path'] = df[args.columns_name]
    
    df = df[df.d_annot<8]
    df = df.reset_index()
    
    list_espece = df.groupby('Code').count().sort_values(df.columns[0],ascending = False)
    data = pd.DataFrame(columns = ['espece','ind'])
    
    for i in range (len(list_espece)):
        esp = list_espece.index[i]
        new_col = pd.DataFrame([[esp,i]],columns = ['espece','ind'])
        data = pd.concat([data,new_col])
    
    liste_espece = data.espece
    liste_espece.to_csv(str(directory+'liste_especes.csv'),index = False)
    
    print('\n',data)
    
    color = mcp.gen_color(cmap = "Wistia", n= len(list_espece))
    
    def process(x):
        count, (f, grp) = x
        filename = str(f)
        duration = DURATION
    
        while len(grp) != 0:
    
            tab = grp[grp.midl <= grp.start.iloc[0]+7]
            fin = pd.DataFrame(columns = ['id','x', 'y', 'width', 'height'])
            duree = tab.d_annot
    
            if len(tab)==0:
                tab = grp
                print(tab)
            rd = round(random.uniform(-1.5,1.5),2)
    
            if args.unique == 'multiple':
                if tab.start.iloc[0] <= 3:
                    offset = 0
                if tab.start.iloc[0] == 3:
                    offset = 1
                if tab.start.iloc[0] >= 3:
                    offset = tab.midl.iloc[0] - 3.5 + rd
            elif args.unique == 'unique':
                offset= 0
    
            window_size = 1024
            window = np.hanning(window_size)
    
            y, sr = librosa.load(filename, offset = offset, duration = duration, sr = None)
            stft  = librosa.core.spectrum.stft(y, n_fft=window_size, hop_length=512, window=window)
            spectrum, freq, time, im = plt.specgram(y, Fs=sr, NFFT=window_size, noverlap=512, cmap='jet')
    
            vmin = np.flipud(np.log10(np.abs(stft))).mean()
            vmax = np.flipud(np.log10(np.abs(stft))).max()
    
            plt.close()
            plt.imshow(np.flipud(np.log10(np.abs(stft))),aspect = "auto", interpolation = None, cmap = 'jet', vmin = vmin, vmax = vmax)
            plt.subplots_adjust(top=1, bottom=0, left=0, right=1)
    
            for idxs, row in tab.iterrows():
                species = row.Code
                x_pxl = (row.midl - offset) / duration
                width_pxl = (row.stop-row.start)/duration
    
                if args.mode == 'uniform':
                    height_pxl = 0.8
                    y_pxl = 0.5
    
                else:
                    y_pxl = 1 - (row.midl_y / (sr/2))
    
                    height_pxl = (row.max_freq - row.min_freq)/(sr/2)
                    if height_pxl > 1:
                        height_pxl = 1
                    elif height_pxl > y_pxl*2:
                        y_pxl=y_pxl+0.5*(height_pxl-y_pxl*2)
    
                annotation = pd.DataFrame([[str(data.loc[data.espece == row.Code,'ind'][0]),x_pxl,y_pxl,width_pxl,height_pxl]],columns = ['id','x', 'y', 'width', 'height'])
    
                fin = pd.concat([fin,annotation])
            grp = grp.drop(tab.index)
    
            name = str(row.Path.replace('/','_').replace('.','_')+'_'+str(count))
            name_file = os.path.join(directory,str('labels_'+str(today.day)+'_'+str(today.month)),str(name+'.txt'))
    
            try :
                plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),row.Code,str(name+'.jpg')))
                fin.to_csv(name_file,sep =' ',header=False,index=False)
                plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all',str(name+'.jpg')))
    
            except :
                os.mkdir(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month))))
                for especes in list_espece.index:
    
                    os.mkdir(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),especes))
                os.mkdir(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all'))
                os.mkdir(os.path.join(directory,str('labels_'+str(today.day)+'_'+str(today.month))))
    
                fin.to_csv(name_file,sep =' ',header=False,index=False)
                plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),row.Code,str(name+'.jpg')))
                plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all',str(name+'.jpg')))
    
            if args.export != None:
                for l in range(len(fin)):
    
                    try :
                        plt.gca().add_patch(Rectangle(((fin.x.iloc[l]*len(time))-(0.5*fin.width.iloc[l]*len(time)), ((fin.y.iloc[l]*len(freq))+0.5*fin.height.iloc[l]*len(freq))), fin.width.iloc[l]*len(time), fin.height.iloc[l]*len(freq), linewidth=3, edgecolor=color[int(fin.id.iloc[l])], facecolor='none'))
    
                    except IndexError:
                        ipdb.set_trace()
                    try:
                        plt.savefig(os.path.join(directory, str('images_annotes_'+str(today.day)+'_'+str(today.month)),str(name+'.jpg')))
    
                    except Exception:
                        os.mkdir(os.path.join(directory,str('images_annotes_'+str(today.day)+'_'+str(today.month))))
                        plt.savefig(os.path.join(directory, str('images_annotes_'+str(today.day)+'_'+str(today.month)),str(name+'.jpg')))
    
            plt.close()
    
    p_map(process, enumerate(df.groupby('Path')), num_cpus=2, total = len(df.groupby('Path')))
    
    print('saved to ',directory)