improve code

2afd11a9 · Stephane Chavin · bed2c86e · 2afd11a9 · 2afd11a9 · 2afd11a9
Commit 2afd11a9 authored Oct 24, 2023 by Stephane Chavin
--- a/README.md
+++ b/README.md
 # YOLO-DYNI

-
-
 Ce git a été créé avec comme objectif une prise en main de YOLOV5 plus facile.
 Il contient notamment un script permettant d'extraire les spectrogrammes de plusieurs enregistrements ([get_spectrogram.py](https://gitlab.lis-lab.fr/stephane.chavin/yolo-dyni/-/blob/main/get_spectrogram.py)), un script nécessaire à la conversion des annotations LabelMe vers YOLO ([labelme2yolo.py](https://gitlab.lis-lab.fr/stephane.chavin/yolo-dyni/-/blob/main/labelme2yolo.py)), un script pour convertir des annotations d'un dataframe vers YOLO ([get_train_annot_YOLO.py](https://gitlab.lis-lab.fr/stephane.chavin/yolo-dyni/-/blob/main/get_train_annot_YOLO.py/)), un script permettant de séparer le train et la validation de manière équilibré ([get_train_val_YOLO.py](https://gitlab.lis-lab.fr/stephane.chavin/yolo-dyni/-/blob/main/get_train_val_YOLO.py)) et un script qui permet de compiler les détections, d'un modèle entrainé, dans un dataframe ([get_yolo_detection.py](https://gitlab.lis-lab.fr/stephane.chavin/yolo-dyni/-/blob/main/get_yolo_detection.py)).


--- a/get_json_file_YOLO.py
+++ b/get_json_file_YOLO.py
@@ -11,41 +11,43 @@ def arg_directory(path):
    else:
        raise argparse.ArgumentTypeError(f'`{path}` is not a valid path')

-parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
-parser.add_argument('-p','--path_to_json', type=arg_directory, help = 'Path of the folder that contain the .json',required=True)
-parser.add_argument('-i','--path_to_img', type=arg_directory, help = 'Path of the folder that contain the .jpg',required=True)
-parser.add_argument('-d','--directory', type=arg_directory, help = 'Directory to wich modified .json files will be stored',required=True)
-args = parser.parse_args()
-
-filename = args.path_to_json
-out_file = args.directory
-img_path = args.path_to_img
-
-liste_file = os.listdir(filename)
-liste_file = pd.DataFrame(liste_file, columns =['fn'])
-liste_file['type'] = liste_file.fn.str.split('.').str[-1]
-liste_file = liste_file[liste_file.type == 'json']
-liste_file.reset_index(inplace = True)
+def process_json_files(json_dir, img_dir, output_dir):
+    json_files = [f for f in os.listdir(json_dir) if f.endswith('.json')]
    
-for i in range (len(liste_file)):
-    if liste_file.fn[i][0] == '.':
-        liste_file = liste_file.drop(i)
+    for json_file in json_files:
+        if json_file.startswith('.'):
+            continue

-liste_file = liste_file.reset_index()
+        json_path = os.path.join(json_dir, json_file)
+        img_path = os.path.join(img_dir, json_file.replace('.json', '.jpg'))

-for i, row in liste_file.iterrows():
-    if len(row.fn) > 30:
-        data = labelme.LabelFile.load_image_file(os.path.join(img_path,str(row.fn[:-4]+'jpg')))
-        image_data = base64.b64encode(data).decode('utf-8')
-    else:
+        if not os.path.exists(img_path):
            continue
+
        try:
-        len(data)
-    except TypeError:
+            with open(img_path, 'rb') as img_file:
+                image_data = base64.b64encode(img_file.read()).decode('utf-8')
+        except FileNotFoundError:
            continue
-    f = open(filename+row.fn,)
-    get_data = json.load(f)
-    get_data['imageData'] = image_data
-    get_data['imagePath'] = img_path+row.fn[:-4]+'jpg'
-    with open(out_file+row.fn, 'w') as f:
-        json.dump(get_data, f, indent=4)
+
+        with open(json_path, 'r') as f:
+            json_data = json.load(f)
+
+        json_data['imageData'] = image_data
+        json_data['imagePath'] = img_path
+
+        output_path = os.path.join(output_dir, json_file)
+        with open(output_path, 'w') as f:
+            json.dump(json_data, f, indent=4)
+
+def main():
+    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
+    parser.add_argument('-p', '--path_to_json', type=arg_directory, help='Path to the folder containing the .json files', required=True)
+    parser.add_argument('-i', '--path_to_img', type=arg_directory, help='Path to the folder containing the .jpg images', required=True)
+    parser.add_argument('-d', '--directory', type=arg_directory, help='Directory to which modified .json files will be stored', required=True)
+    args = parser.parse_args()
+
+    process_json_files(args.path_to_json, args.path_to_img, args.directory)
+
+if __name__ == "__main__":
+    main()
--- a/get_spectrogram.py
+++ b/get_spectrogram.py
 import os
 import librosa
-import ipdb
 import glob
 import argparse
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 from p_tqdm import p_map
-from tqdm import tqdm

 import warnings
 warnings.filterwarnings('ignore')
@@ -18,69 +16,65 @@ def arg_directory(path):
    else:
        raise argparse.ArgumentTypeError(f'`{path}` is not a valid path')

-parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
-parser.add_argument('-f','--file', type=str,help = 'Name of the file that contain the recording to print')
-parser.add_argument('-p','--path_to_data', type=arg_directory, help = 'Path of the folder that contain the recordings')
-parser.add_argument('-d','--directory', type=arg_directory, help = 'Directory to wich spectrogram will be stored')
-parser.add_argument('-m','--mode', type=str,choices=['unique','multiple'], help = 'Direction of the saved spectrogram')
-parser.add_argument('-n','--columns_name', type=str, help = 'Name of the columns that contain the path of the .wav')
-parser.add_argument('-i','--input', type=str, choices=['file','folder'], help = 'Choose "file" if you have a .csv file or "folder" to export spectrogram from all the .wav of a folder')
-args = parser.parse_args()
+def create_spectrogram(y, directory, filename, offset, duration):
+    window_size = 1024
+    window = np.hanning(window_size)
+    stft = librosa.core.spectrum.stft(y, n_fft=window_size, hop_length=512, window=window)

-path_to_data = args.path_to_data
-direction = args.directory
-folder = 'Spectrogram/'
+    plt.close()
+    plt.figure()

-#PARAMETERS
-DURATION = 8
-OVERLAP = 2
+    log_stft = np.log10(np.abs(stft))
+    vmin, vmax = log_stft.min(), log_stft.max()

-if args.mode == 'multiple':
-    NB_IMG_PER_REC = 30     
-elif args.mode == 'unique': 
-    NB_IMG_PER_REC = 1
+    plt.imshow(log_stft[::-1], aspect="auto", interpolation=None, cmap='jet', vmin=vmin, vmax=vmax)
+    plt.subplots_adjust(top=1, bottom=0, left=0, right=1)

-if args.input == 'file':
-    df = pd.read_csv(args.file,low_memory=False)
-    df['Path'] = df[args.columns_name]
-elif args.input == 'folder':
-    df = pd.DataFrame(glob.glob(os.path.join(path_to_data,'*')),columns = ['Path'])
+    name = os.path.join(directory, 'Spectrogram', f"{filename.replace('/', '_').split('.')[0]}_{offset}")
    
-def process(x):
-    _, (i) = x
+    try:
+        plt.savefig(name + '.jpg')
+    except FileNotFoundError:
+        os.makedirs(os.path.join(directory, 'Spectrogram'), exist_ok=True)
+        plt.savefig(name + '.jpg')

-    for count, j in enumerate(range (NB_IMG_PER_REC)): #30*8 secondes - 30*2 secondes (overlap) = 180 secondes affichées sur 30 images : n'affiche que les 3 premières minutes d'un enregistrement
+def process_recordings(args):
+    _, (i) = args
+    duration = 8
+    overlap = 2

+    for count in range(args.img_per_rec):
+        offset = count * (duration - overlap)
        filename = str(i[0])
-        offset = count * (DURATION - OVERLAP)
+        
        try:
-            y, sr = librosa.load(filename, offset = offset, duration = DURATION, sr = None)
+            y, _ = librosa.load(filename, offset=offset, duration=duration, sr=None)
+            create_spectrogram(y, args.directory, filename, offset, duration)
        except Exception:
            print(filename)
-            continue
-        window_size = 1024
-        window = np.hanning(window_size)
-        stft  = librosa.core.spectrum.stft(y, n_fft=window_size, hop_length=512, window=window)
-
-        plt.close()
-        plt.figure()
-
-        vmin = np.flipud(np.log10(np.abs(stft))).mean()
-        vmax = np.flipud(np.log10(np.abs(stft))).max()

-        plt.imshow(np.flipud(np.log10(np.abs(stft))),aspect = "auto",interpolation = None,cmap = 'jet',vmin = vmin,vmax = vmax)
-
-        plt.subplots_adjust(top=1, bottom=0, left=0, right=1)
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
+    parser.add_argument('-f', '--file', type=str, help='Name of the file that contains the recording to print')
+    parser.add_argument('-p', '--path_to_data', type=arg_directory, help='Path of the folder that contains the recordings', required=True)
+    parser.add_argument('-d', '--directory', type=arg_directory, help='Directory to which spectrograms will be stored', required=True)
+    parser.add_argument('-m', '--mode', type=str, choices=['unique', 'multiple'], help='Direction of the saved spectrogram')
+    parser.add_argument('-n', '--columns_name', type=str, help='Name of the columns that contain the path of the .wav')
+    parser.add_argument('-i', '--input', type=str, choices=['file', 'folder'], help='Choose "file" if you have a .csv file or "folder" to export spectrogram from all the .wav of a folder')
+    args = parser.parse_args()

-        name = str(i[0].replace('/','_').split('.')[0]+'_'+str(offset)) #count
-        try :
-            plt.savefig(os.path.join(direction,folder, str(name+'.jpg')))
+    if args.mode == 'multiple':
+        img_per_rec = 30
+    elif args.mode == 'unique':
+        img_per_rec = 1

-        except FileNotFoundError:
-            print('creating the directory : ',os.path.join(direction,folder))
-            os.mkdir(os.path.join(direction,folder))
-            plt.savefig(os.path.join(direction,folder, str(name+'.jpg')))
+    path_to_data = args.path_to_data

+    if args.input == 'file':
+        df = pd.read_csv(args.file, low_memory=False)
+        df['Path'] = df[args.columns_name]
+    elif args.input == 'folder':
+        df = pd.DataFrame(glob.glob(os.path.join(path_to_data, '*'), recursive=True), columns=['Path'])

-p_map(process, enumerate(df.groupby('Path')), num_cpus=1, total = len(df.groupby('Path'))) 
-print('saved to ',str(directory+folder))
+    p_map(process_recordings, enumerate(df.groupby('Path'), img_per_rec=img_per_rec), num_cpus=1, total=len(df.groupby('Path')))
+    print(f'Saved to {args.directory}/Spectrogram')
--- a/get_time_freq_detection.py
+++ b/get_time_freq_detection.py
 import pandas as pd
 import os
-import ipdb
-from tqdm import tqdm
 import argparse
 from datetime import date
+from tqdm import tqdm

 def arg_directory(path):
    if os.path.isdir(path):
@@ -11,47 +10,44 @@ def arg_directory(path):
    else:
        raise argparse.ArgumentTypeError(f'`{path}` is not a valid path')

-parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
-parser.add_argument('-p','--path_to_data', type=arg_directory, help = 'Path of the folder that contain the .txt files',required=True)
-parser.add_argument('-d','--directory', type=arg_directory, help = 'Directory to wich the dataframe will be stored',required=True)
-parser.add_argument('-t','--duration', type = int, help = 'Duration of the spectrogram', required = True)
-parser.add_argument('-s','--SR', type = int, help = 'Sampling Rate of the spectrogram')
-args = parser.parse_args()
-
-annots = args.path_to_data
-
+def process_annotations(annotations_folder, duration, sr):
    today = date.today()
-out_file = str('YOLO_detection'+str('_'+str(today.day)+'_'+str(today.month)))
+    out_file = f'YOLO_detection_{today.day}_{today.month}'
+
+    df_list = []
+    names = []  # Add your class names here

-outdir = args.directory
+    for file_name in tqdm(os.listdir(annotations_folder)):
+        if file_name.endswith('.txt'):
+            file_path = os.path.join(annotations_folder, file_name)
+            annotation_df = pd.read_csv(file_path, sep=' ', names=['espece', 'x', 'y', 'w', 'h'])

-df = pd.concat({f:pd.read_csv(os.path.join(annots, f), sep=' ', names=['espece', 'x', 'y', 'w', 'h'])
-				for f in tqdm(os.listdir(annots))}, names=['file'])
+            annotation_df['file'] = file_name
+            annotation_df['idx'] = annotation_df['file'].str.split('_').str[-1].str.split('.').str[0]
+            annotation_df['file'] = annotation_df['file'].str.rsplit('.', 1).str[0] + '.wav'

-df = df.reset_index(level=[0])
-df = df.reset_index()
-del df['index']
-df['idx'] = df.file.str.split('_').str[-1].str.split('.').str[0]
-df.file = df.file.str.rsplit('.',1).str[0]+'.wav'
+            annotation_df['annot'] = annotation_df['espece'].apply(lambda x: names[x])

-DUREE_SPECTRO = args.duration
-SR = args.SR
+            annotation_df['midl'] = (annotation_df['x'] * duration) + annotation_df['idx'].astype(int)
+            annotation_df['freq_center'] = (1 - annotation_df['y']) * (sr / 2)
+            annotation_df['freq_min'] = annotation_df['freq_center'] - (annotation_df['h'] * (sr / 2)) / 2
+            annotation_df['freq_max'] = annotation_df['freq_center'] + (annotation_df['h'] * (sr / 2)) / 2
+            annotation_df['start'] = annotation_df['midl'] - (annotation_df['w'] * duration) / 2
+            annotation_df['stop'] = annotation_df['midl'] + (annotation_df['w'] * duration) / 2
+            annotation_df['duration'] = annotation_df['stop'] - annotation_df['start']

-#put the classes here
-names = []
+            df_list.append(annotation_df)

-df['annot'] = 'None'
-for j in range (len(df)):
-	df.loc[j,('annot')] = names[int(df.espece.iloc[j])]
+    result_df = pd.concat(df_list, ignore_index=True)
+    result_df.to_csv(os.path.join(outdir, f'{out_file}.csv'), index=False)
+    print(f'Saved as {os.path.join(outdir, f"{out_file}.csv")}')

-print('Calculating the positions','\n')
-df['midl'] = (df.x*DUREE_SPECTRO)+(df.idx.astype(int))
-df['freq_center'] = (1-df.y)*(SR/2)
-df['freq_min'] = df.freq_center - (df.h*(SR/2))/2
-df['freq_max'] = df.freq_center + (df.h*(SR/2))/2
-df['start'] = df.midl - (df.w * DUREE_SPECTRO)/2
-df['stop'] = df.midl + (df.w * DUREE_SPECTRO)/2
-df['duration'] = df.stop - df.start
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
+    parser.add_argument('-p', '--path_to_data', type=arg_directory, help='Path of the folder that contains the .txt files', required=True)
+    parser.add_argument('-d', '--directory', type=arg_directory, help='Directory where the dataframe will be stored', required=True)
+    parser.add_argument('-t', '--duration', type=int, help='Duration of the spectrogram', required=True)
+    parser.add_argument('-s', '--SR', type=int, help='Sampling Rate of the spectrogram')
+    args = parser.parse_args()

-df.to_csv(os.path.join(outdir,str(out_file+'.csv')), index= False)
-print('saved as ',os.path.join(outdir,str(out_file+'.csv')))
+    process_annotations(args.path_to_data, args.duration, args.SR)
--- a/get_train_annot_YOLO.py
+++ b/get_train_annot_YOLO.py
@@ -3,95 +3,41 @@ import pandas as pd
 import librosa
 import numpy as np
 import matplotlib.pyplot as plt
-from p_tqdm import p_map
-import ipdb
 import random
 from datetime import date
 import argparse
+from p_tqdm import p_map
 import cv2
-import matplotlib.patches as patches
-from matplotlib.patches import Rectangle
-from random import randrange
-from PIL import Image
 from mycolorpy import colorlist as mcp

-today = date.today()
-
 def arg_directory(path):
    if os.path.isdir(path):
        return path
    else:
        raise argparse.ArgumentTypeError(f'`{path}` is not a valid path')

-parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
-parser.add_argument('-f','--filename_path', type= str, help = 'Path and name of the file containing the annotations',required=True)
-parser.add_argument('-p','--path_to_data', type=arg_directory, help = 'Path of the folder that contain the recordings',required=True)
-parser.add_argument('-d','--directory', type=arg_directory, help = 'Directory to wich spectrograms and .txt files will be stored',required=True)
-parser.add_argument('-m','--mode',type=str,choices=['uniform','personalized'],help = 'Choose the mode to calculate the y and height value',required=True)
-parser.add_argument('-u','--unique',type=str, choices=['unique','multiple'], help = 'unique for only one spectrogram per file, multple for multiple spectrogram',required=True)
-parser.add_argument('-c','--columns_name',type=str,help = 'Name of the column that contain the path',required=True)
-parser.add_argument('--export',type=str, default=None, help='To export the position of the bounding box on the spectrogram',required=False)
-args = parser.parse_args()
+def process_annotations(file_path, duration, mode, unique, columns_name, export):
+    today = date.today()

-directory = args.directory
+    df = pd.read_csv(file_path, low_memory=False)
+    df.rename(columns={'label': 'Code', 'annotation_initial_time': 'start', 'annotation_final_time': 'stop', 'duree': 'd_annot', 'min_frequency': 'min_freq', 'max_frequency': 'max_freq', 'avg_frequency': 'midl_y'}, inplace=True)
+    df['max_freq'].fillna(9000, inplace=True)
+    df['min_freq'].fillna(1000, inplace=True)
+    df['midl_y'].fillna(5000, inplace=True)

-DURATION = 8
    NB_CLASS = 5
-
-df = pd.read_csv(args.filename_path, low_memory=False)
-
-df.rename(columns={'label':'Code'},inplace=True)
-df.rename(columns={'annotation_initial_time':'start'},inplace=True)
-df.rename(columns={'annotation_final_time':'stop'},inplace=True)
-df.rename(columns={'duree':'d_annot'},inplace=True)
-df.rename(columns={'min_frequency':'min_freq'},inplace=True)
-df.rename(columns={'max_frequency':'max_freq'},inplace=True)
-df.rename(columns={'avg_frequency':'midl_y'},inplace=True)
-
    tab = df.groupby('Code').count()
    tab = tab.sort_values(tab.columns[0], ascending=False)[:NB_CLASS]
    df = df[df.Code.isin(tab.index)]
-
-try :
-    df['max_freq'].fillna(9000,inplace = True)
-    df['min_freq'].fillna(1000,inplace = True)
-except Exception:
-    df['max_freq'] = 9000
-    df['min_freq'] = 1000
-    df['midl_y'] = 5000
-
    df['d_annot'] = df.stop - df.start
    df['midl'] = (df.stop + df.start) / 2
-df['Path'] = df[args.columns_name]
+    df['Path'] = df[columns_name]

    df = df[df.d_annot < 8]
    df = df.reset_index()

-list_espece = df.groupby('Code').count().sort_values(df.columns[0],ascending = False)
-data = pd.DataFrame(columns = ['espece','ind'])
-
-for i in range (len(list_espece)):
-    esp = list_espece.index[i]
-    new_col = pd.DataFrame([[esp,i]],columns = ['espece','ind'])
-    data = pd.concat([data,new_col])
-
-liste_espece = data.espece
-liste_espece.to_csv(str(directory+'liste_especes.csv'),index = False)
-
-print('\n',data)
-
-#color = mcp.gen_color(cmap = "Wistia", n= len(list_espece))
-
-colors = pd.DataFrame(columns = ['color', 'species'])
-
-for i in range (30):
-    r = randrange(255)
-    g = randrange(255)
-    b = randrange(255)
-    rand_color = (r, g, b)
-    new = pd.DataFrame([[rand_color, i]], columns = ['color', 'species'])
-    colors = pd.concat([colors, new])
-
+    # Add your class names to the 'colors' list
+    colors = [(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) for _ in range(30)]

    def process(x):
        count, (f, grp) = x
@@ -99,29 +45,27 @@ def process(x):
        duration = DURATION

        while len(grp) != 0:
-
            tab = grp[grp.midl <= grp.start.iloc[0] + 7]
            fin = pd.DataFrame(columns=['id', 'x', 'y', 'width', 'height'])
            duree = tab.d_annot

            if len(tab) == 0:
                tab = grp
-            print(tab)
+
            rd = round(random.uniform(-1.5, 1.5), 2)

-        if args.unique == 'multiple':
+            if unique == 'multiple':
                if tab.start.iloc[0] <= 3:
                    offset = 0
-            if tab.start.iloc[0] == 3:
+                elif tab.start.iloc[0] == 3:
                    offset = 1
-            if tab.start.iloc[0] >= 3:
+                elif tab.start.iloc[0] >= 3:
                    offset = tab.midl.iloc[0] - 3.5 + rd
-        elif args.unique == 'unique':
+            elif unique == 'unique':
                offset = 0

            window_size = 1024
            window = np.hanning(window_size)
-
            y, sr = librosa.load(filename, offset=offset, duration=duration, sr=None)
            stft = librosa.core.spectrum.stft(y, n_fft=window_size, hop_length=512, window=window)
            spectrum, freq, time, im = plt.specgram(y, Fs=sr, NFFT=window_size, noverlap=512, cmap='jet')
@@ -130,7 +74,7 @@ def process(x):
            vmax = np.flipud(np.log10(np.abs(stft))).max()

            plt.close()
-        plt.imshow(np.flipud(np.log10(np.abs(stft))),aspect = "auto", interpolation = None, cmap = 'jet', vmin = vmin, vmax = vmax)
+            plt.imshow(np.flipud(np.log10(np.abs(stft)), aspect='auto', interpolation=None, cmap='jet', vmin=vmin, vmax=vmax))
            plt.subplots_adjust(top=1, bottom=0, left=0, right=1)

            for idxs, row in tab.iterrows():
@@ -138,22 +82,21 @@ def process(x):
                x_pxl = (row.midl - offset) / duration
                width_pxl = (row.stop - row.start) / duration

-            if args.mode == 'uniform':
+                if mode == 'uniform':
                    height_pxl = 0.8
                    y_pxl = 0.5
-
                else:
                    y_pxl = 1 - (row.midl_y / (sr / 2))
-
                    height_pxl = (row.max_freq - row.min_freq) / (sr / 2)
                    if height_pxl > 1:
                        height_pxl = 1
                    elif height_pxl > y_pxl * 2:
                        y_pxl = y_pxl + 0.5 * (height_pxl - y_pxl * 2)

-            annotation = pd.DataFrame([[str(data.loc[data.espece == row.Code,'ind'][0]),x_pxl,y_pxl,width_pxl,height_pxl]],columns = ['id','x', 'y', 'width', 'height'])
-
+                annotation = pd.DataFrame([[str(data.loc[data.espece == row.Code, 'ind'][0]), x_pxl, y_pxl, width_pxl, height_pxl]],
+                                         columns=['id', 'x', 'y', 'width', 'height'])
                fin = pd.concat([fin, annotation])
+
            grp = grp.drop(tab.index)

            name = str(row.Path.replace('/', '_').replace('.', '_') + '_' + str(count))
@@ -162,64 +105,65 @@ def process(x):
            try:
                plt.savefig(os.path.join(directory, str('images_' + str(today.day) + '_' + str(today.month)), row.Code, str(name + '.jpg')))
                fin.to_csv(name_file, sep=' ', header=False, index=False)
-            plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all',str(name+'.jpg')))
-
+                plt.savefig(os.path.join(directory, str('images_' + str(today.day) + '_' + str(today.month)), 'all',
+                                    str(name + '.jpg')))
            except:
                os.mkdir(os.path.join(directory, str('images_' + str(today.day) + '_' + str(today.month))))
                for especes in list_espece.index:
-
-                os.mkdir(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),especes))
-            os.mkdir(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all'))
+                    os.mkdir(os.path.join(directory, str('images_' + str(today.day) + '_' + str(today.month), especes)))
+                os.mkdir(os.path.join(directory, str('images_' + str(today.day) + '_' + str(today.month), 'all')))
                os.mkdir(os.path.join(directory, str('labels_' + str(today.day) + '_' + str(today.month))))
-
                fin.to_csv(name_file, sep=' ', header=False, index=False)
-            plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),row.Code,str(name+'.jpg')))
-            plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all',str(name+'.jpg')))
+                plt.savefig(os.path.join(directory, str('images_' + str(today.day) + '_' + str(today.month)), row.Code,
+                                    str(name + '.jpg')))
+                plt.savefig(os.path.join(directory, str('images_' + str(today.day) + '_' + str(today.month)), 'all',
+                                    str(name + '.jpg')))

            plt.close()

-        if args.export != None:
-
-            im = cv2.imread(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all',str(name+'.jpg')))
+            if export:
+                im = cv2.imread(
+                    os.path.join(directory, str('images_' + str(today.day) + '_' + str(today.month)), 'all', str(name + '.jpg')))
                im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
                H, W = im.shape[0], im.shape[1]
                for l in range(len(fin)):
-
                    x, y, w, h = fin.x.iloc[l] * W, fin.y.iloc[l] * H, fin.width.iloc[l] * W, fin.height.iloc[l] * H
-
-                shape1 = (int(x-(0.5*w)), int(y+(0.5*h)))
-                shape2 = (int(x+(0.5*w)), int(y+(0.5*h)))
-                shape3 = (int(x+-(0.5*w)), int(y-(0.5*h)))
-                shape4 = (int(x+(0.5*w)), int(y-(0.5*h)))
-
-                #rectangle text shape
-
+                    shape1 = (int(x - 0.5 * w), int(y + 0.5 * h))
+                    shape2 = (int(x + 0.5 * w), int(y + 0.5 * h))
+                    shape3 = (int(x - 0.5 * w), int(y - 0.5 * h))
+                    shape4 = (int(x + 0.5 * w), int(y - 0.5 * h)
+                              )
                    shp1 = shape4[0] - 10, shape4[1] + 20
                    shp2 = shape4[0], shape4[1] + 20
                    shp3 = shape4[0] - 10, shape4[1]
                    shp4 = shape4[0], shape4[1]
-
-                #text placement
-
                    text_shape = shp1[0], shp1[1] - 5
-
                    label = str(fin.id.iloc[l])
                    cv2.rectangle(im, pt1=shape1, pt2=shape4, color=colors[colors.species == label].color, thickness=1)
-
                    cv2.rectangle(im, pt1=shp1, pt2=shp4, color=colors[colors.species == label].color, thickness=-1)
                    cv2.putText(im, label, text_shape, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
-                
                plt.imshow(im)
                plt.subplots_adjust(top=1, bottom=0, left=0, right=1)
                try:
-                plt.savefig(os.path.join(directory, str('images_annotes_'+str(today.day)+'_'+str(today.month)),str(name+'.jpg')))
-
+                    plt.savefig(
+                        os.path.join(directory, str('images_annotes_' + str(today.day) + '_' + str(today.month), str(name + '.jpg'))))
                except Exception:
                    os.mkdir(os.path.join(directory, str('images_annotes_' + str(today.day) + '_' + str(today.month))))
-                plt.savefig(os.path.join(directory, str('images_annotes_'+str(today.day)+'_'+str(today.month)),str(name+'.jpg')))
-
+                    plt.savefig(os.path.join(directory, str('images_annotes_' + str(today.day) + '_' + str(today.month), str(name + '.jpg'))))
                plt.close()

    p_map(process, enumerate(df.groupby('Path')), num_cpus=2, total=len(df.groupby('Path')))
-
    print('saved to', directory)
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
+    parser.add_argument('-f', '--filename_path', type=str, help='Path and name of the file containing the annotations', required=True)
+    parser.add_argument('-p', '--path_to_data', type=arg_directory, help='Path of the folder that contains the recordings', required=True)
+    parser.add_argument('-d', '--directory', type=arg_directory, help='Directory to which spectrograms and .txt files will be stored', required=True)
+    parser.add_argument('-m', '--mode', type=str, choices=['uniform', 'personalized'], help='Choose the mode to calculate the y and height value', required=True)
+    parser.add_argument('-u', '--unique', type=str, choices=['unique', 'multiple'], help='unique for only one spectrogram per file, multiple for multiple spectrograms', required=True)
+    parser.add_argument('-c', '--columns_name', type=str, help='Name of the column that contains the path', required=True)
+    parser.add_argument('--export', type=str, default=None, help='To export the position of the bounding box on the spectrogram', required=False)
+    args = parser.parse_args()
+
+    process_annotations(args.filename_path, duration=8, mode=args.mode, unique=args.unique, columns_name=args.columns_name, export=args.export)
--- a/get_train_val_YOLO.py
+++ b/get_train_val_YOLO.py
@@ -5,7 +5,6 @@ from tqdm import tqdm
 import numpy as np
 import shutil
 import argparse
-import ipdb

 def arg_directory(path):
    if os.path.isdir(path):
@@ -13,12 +12,17 @@ def arg_directory(path):
    else:
        raise argparse.ArgumentTypeError(f'`{path}` is not a valid path')

-parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
-parser.add_argument('-r','--ratio', type=float, default = 0.7,help = 'Train Ratio (val = 1 - ratio)')
-parser.add_argument('-p','--path_to_data', type=arg_directory, help = 'Path of the folder that contain the .txt (ending with labels/)',required=True)
-parser.add_argument('-d','--direction', type=arg_directory, help = 'Directory to wich spectrogram and .txt files will be stored (different from -p)',required=True)
-args = parser.parse_args()
+def create_directory_if_not_exists(directory):
+    if not os.path.exists(directory):
+        os.mkdir(directory)
+
+def copy_files_to_directory(file_list, source_dir, destination_dir):
+    for file_name in file_list:
+        source_path = os.path.join(source_dir, f'{file_name}.txt')
+        destination_path = os.path.join(destination_dir, f'{file_name}.txt')
+        shutil.copy2(source_path, destination_path)

+def process_data(args):
    path = args.path_to_data
    direction = args.direction

@@ -60,36 +64,35 @@ train = pd.DataFrame(df.groupby('file').count().index,columns = ['file'])
    val.file = val.file.str.rsplit('.', 1).str[0]
    train.file = train.file.str.rsplit('.', 1).str[0]

-isExist = os.path.exists(os.path.join(direction,'images'))
-
-if not isExist:
-	os.mkdir(os.path.join(direction,'images'))
-	os.mkdir(os.path.join(direction,'images/train'))
-	os.mkdir(os.path.join(direction,'images/val'))
-	os.mkdir(os.path.join(direction,'labels'))
-	os.mkdir(os.path.join(direction,'labels/train'))
-	os.mkdir(os.path.join(direction,'labels/val'))
-
-for i,row in tqdm(val.iterrows(), total=val.shape[0]):
-
-	shutil.copy2(os.path.join(path, str(row.file+'.txt')), os.path.join(direction, str('labels/val/'+row.file+'.txt')))
-	shutil.copy2(os.path.join(path, str('../images/all/'+row.file+'.jpg')), os.path.join(direction, str('images/val/'+row.file+'.jpg')))
+    create_directory_if_not_exists(os.path.join(direction, 'images'))
+    create_directory_if_not_exists(os.path.join(direction, 'images/train'))
+    create_directory_if_not_exists(os.path.join(direction, 'images/val'))
+    create_directory_if_not_exists(os.path.join(direction, 'labels'))
+    create_directory_if_not_exists(os.path.join(direction, 'labels/train'))
+    create_directory_if_not_exists(os.path.join(direction, 'labels/val'))

-for i,row in tqdm(train.iterrows(), total=train.shape[0]):
+    copy_files_to_directory(val.file, path, os.path.join(direction, 'labels/val'))
+    copy_files_to_directory(val.file, os.path.join(path, '../images/all'), os.path.join(direction, 'images/val'))

-	shutil.copy2(os.path.join(path, str(row.file+'.txt')), os.path.join(direction, str('labels/train/'+row.file+'.txt')))
-	shutil.copy2(os.path.join(path, str('../images/all/'+row.file+'.jpg')), os.path.join(direction, str('images/train/'+row.file+'.jpg')))
+    copy_files_to_directory(train.file, path, os.path.join(direction, 'labels/train'))
+    copy_files_to_directory(train.file, os.path.join(path, '../images/all'), os.path.join(direction, 'images/train'))

    try:
-	liste_espece = pd.read_csv(str(path+'../liste_especes.csv'))
+        liste_espece = pd.read_csv(os.path.join(path, '../liste_especes.csv'))
    except Exception:
-	print('No species list detected, please add to ',str(direction+'custom_data.yaml'))
+        print('No species list detected, please add it to', os.path.join(direction, 'custom_data.yaml'))

-with open(str(direction+'custom_data.yaml'),'w') as f:
-	f.write('train : '+direction+'images/train\n')
-	f.write('val : '+direction+'images/val\n')
-	f.write('nc : ' + str(len(liste_espece))+'\n')
-	try:
-		f.write('names : ' + str(liste_espece.espece.tolist()))
-	except Exception:
-		print('.yaml saved to ',str(direction+'custom_data.yaml'))
\ No newline at end of file
+    with open(os.path.join(direction, 'custom_data.yaml'), 'w') as f:
+        f.write(f'train: {os.path.join(direction, "images/train")}\n')
+        f.write(f'val: {os.path.join(direction, "images/val")}\n')
+        f.write(f'nc: {len(liste_espece)}\n')
+        f.write(f'names: {liste_espece.espece.tolist()}')
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
+    parser.add_argument('-r', '--ratio', type=float, default=0.7, help='Train Ratio (val = 1 - ratio)')
+    parser.add_argument('-p', '--path_to_data', type=arg_directory, help='Path of the folder that contains the .txt (ending with labels/)', required=True)
+    parser.add_argument('-d', '--direction', type=arg_directory, help='Directory to which spectrogram and .txt files will be stored (different from -p)', required=True)
+    args = parser.parse_args()
+
+    process_data(args)
--- a/get_yolo_detection.py
+++ b/get_yolo_detection.py
 import pandas as pd
 import os
-import ipdb
 from tqdm import tqdm
 import argparse
 from datetime import date
@@ -11,18 +10,17 @@ def arg_directory(path):
    else:
        raise argparse.ArgumentTypeError(f'`{path}` is not a valid path')

-parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
-parser.add_argument('-p','--path_to_data', type=arg_directory, help = 'Path of the folder that contain the .txt files',required=True)
-parser.add_argument('-d','--direction', type=arg_directory, help = 'Directory to wich the dataframe will be stored',required=True)
-args = parser.parse_args()
-
+def process_data(args):
    annots = args.path_to_data
-
    today = date.today()
-out_file = str('YOLO_detection_'+str('_'+str(today.day)+'_'+str(today.month)))
-
+    out_file = f'YOLO_detection_{today.day}_{today.month}'
    outdir = args.direction

+    # Define constants
+    DUREE_SPECTRO = 8
+    OVERLAP = 2
+
+    # Load and process data
    df = pd.concat({f: pd.read_csv(os.path.join(annots, f), sep=' ', names=['espece', 'x', 'y', 'w', 'h', 'conf'])
                    for f in tqdm(os.listdir(annots))}, names=['file'])

@@ -32,15 +30,19 @@ del df['index']
    df['idx'] = df.file.str.split('_').str[-1].str.split('.').str[0]
    df.file = df.file.str.rsplit('.', 1).str[0] + '.wav'

-DUREE_SPECTRO = 8
-OVERLAP = 2
-
-#put the classes here
+    # Define class names
    names = []

-df['annot'] = 'None'
-for j in range (len(df)):
-	df['annot'].iloc[j] = names[int(df.espece.iloc[j])]
+    df['annot'] = df['espece'].apply(lambda x: names[int(x)])
+
+    # Save the processed DataFrame to a CSV file
+    df.to_csv(os.path.join(outdir, f'{out_file}.csv'), index=False)
+    print(f'Saved as {os.path.join(outdir, f"{out_file}.csv")}')
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
+    parser.add_argument('-p', '--path_to_data', type=arg_directory, help='Path of the folder that contains the .txt files', required=True)
+    parser.add_argument('-d', '--direction', type=arg_directory, help='Directory to which the dataframe will be stored', required=True)
+    args = parser.parse_args()

-df.to_csv(os.path.join(outdir,str(out_file+'.csv')), index= False)
-print('saved as ',os.path.join(outdir,str(out_file+'.csv')))
+    process_data(args)
--- a/labelme2yolo.py
+++ b/labelme2yolo.py
 import os
 import json
-import random
 import base64
 import shutil
 import argparse
-from pathlib import Path
 from glob import glob
+from pathlib import Path

 def arg_directory(path):
    if os.path.isdir(path):
@@ -13,55 +12,61 @@ def arg_directory(path):
    else:
        raise argparse.ArgumentTypeError(f'`{path}` is not a valid path')

-parser = argparse.ArgumentParser(description='Convert LabelMe annotations to YOLO compatible')
-parser.add_argument('-p','--path_to_data',type=arg_directory,help='Path to LabelMe annotations')
-parser.add_argument('-d','--directory',type=arg_directory,help='Directory to which YOLO annotations will be stored')
-args = parser.parse_args()
-
-# YOLO metadata and files
-yolo_names = []
-yolo_names_path = os.path.join(args.directory, 'custom.names')
-
-# Convert image annotations
-
-for index, labelme_annotation_path in enumerate(glob(f'{args.path_to_data}/*.json')):
-    image_id = os.path.basename(labelme_annotation_path).rstrip('.json')
-
-    labelme_annotation_file = open(labelme_annotation_path, 'r')
+def convert_labelme_to_yolo(labelme_annotation_path, yolo_directory):
+    # Load LabelMe annotation
+    image_id = Path(labelme_annotation_path).stem
+    with open(labelme_annotation_path, 'r') as labelme_annotation_file:
        labelme_annotation = json.load(labelme_annotation_file)

-    yolo_annotation_path = os.path.join(args.directory,'labels', image_id + '.txt')
-    yolo_annotation_file = open(yolo_annotation_path, 'w')
-    yolo_image = base64.decodebytes(labelme_annotation['imageData'].encode())
-    yolo_image_path = os.path.join(args.directory, 'labels', image_id + '.jpg')
+    # YOLO annotation and image paths
+    yolo_annotation_path = os.path.join(yolo_directory, 'labels', f'{image_id}.txt')
+    yolo_image_path = os.path.join(yolo_directory, 'images', f'{image_id}.jpg')
+
+    with open(yolo_annotation_path, 'w') as yolo_annotation_file:
+        yolo_image_data = base64.b64decode(labelme_annotation['imageData'])

-    # Write YOLO image (and it to the list)
-    yolo_image_file = open(yolo_image_path, 'wb')
-    yolo_image_file.write(yolo_image)
-    yolo_image_file.close()
+        # Write YOLO image
+        with open(yolo_image_path, 'wb') as yolo_image_file:
+            yolo_image_file.write(yolo_image_data)

        # Write YOLO image annotation
        for shape in labelme_annotation['shapes']:
            if shape['shape_type'] != 'rectangle':
-            print(
-                f'Invalid type `{shape["shape_type"]}` in annotation `annotation_path`')
+                print(f'Invalid type `{shape["shape_type"]}` in annotation `{labelme_annotation_path}`')
                continue
-        if shape['label'] not in yolo_names:
-            yolo_names.append(shape['label'])

-        points = shape['points']
-        scale_width = 1.0 / labelme_annotation['imageWidth']
-        scale_height = 1.0 / labelme_annotation['imageHeight']
-        width = abs(points[1][0] - points[0][0]) * scale_width
-        height = abs(points[1][1] - points[0][1]) * scale_height
-        x = (abs(points[1][0] + points[0][0]) / 2) * scale_width
-        y = (abs(points[1][1] + points[0][1]) / 2) * scale_height
+            label = shape['label']
+            x1, y1 = shape['points'][0]
+            x2, y2 = shape['points'][1]
+            width = x2 - x1
+            height = y2 - y1
+            x_center = (x1 + x2) / 2
+            y_center = (y1 + y2) / 2

-        object_class = shape['label']
-        yolo_annotation_file.write(f'{object_class} {x} {y} {width} {height}\n')
+            annotation_line = f'{label} {x_center} {y_center} {width} {height}\n'
+            yolo_annotation_file.write(annotation_line)
+
+def main(args):
+    yolo_names = set()
+
+    for labelme_annotation_path in glob(f'{args.path_to_data}/*.json'):
+        convert_labelme_to_yolo(labelme_annotation_path, args.directory)
+
+        with open(labelme_annotation_path, 'r') as labelme_annotation_file:
+            labelme_annotation = json.load(labelme_annotation_file)
+
+            for shape in labelme_annotation['shapes']:
+                yolo_names.add(shape['label'])

    # Write YOLO names
-yolo_names_file = open(yolo_names_path, 'w')
-yolo_names_file.write(os.linesep.join(yolo_names))
-yolo_names_file.close()
+    yolo_names_path = os.path.join(args.directory, 'custom.names')
+    with open(yolo_names_path, 'w') as yolo_names_file:
+        yolo_names_file.write('\n'.join(yolo_names))
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Convert LabelMe annotations to YOLO compatible')
+    parser.add_argument('-p', '--path_to_data', type=arg_directory, help='Path to LabelMe annotations', required=True)
+    parser.add_argument('-d', '--directory', type=arg_directory, help='Directory to which YOLO annotations will be stored', required=True)
+    args = parser.parse_args()

+    main(args)