diff --git a/README.md b/README.md index 9cae1d1260f10db240a30eb62e6c43f5792fa2a0..93a68b9975338518a3a3c87e411a1f34a9975186 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,5 @@ # YOLO-DYNI - - Ce git a été créé avec comme objectif une prise en main de YOLOV5 plus facile. Il contient notamment un script permettant d'extraire les spectrogrammes de plusieurs enregistrements ([get_spectrogram.py](https://gitlab.lis-lab.fr/stephane.chavin/yolo-dyni/-/blob/main/get_spectrogram.py)), un script nécessaire à la conversion des annotations LabelMe vers YOLO ([labelme2yolo.py](https://gitlab.lis-lab.fr/stephane.chavin/yolo-dyni/-/blob/main/labelme2yolo.py)), un script pour convertir des annotations d'un dataframe vers YOLO ([get_train_annot_YOLO.py](https://gitlab.lis-lab.fr/stephane.chavin/yolo-dyni/-/blob/main/get_train_annot_YOLO.py/)), un script permettant de séparer le train et la validation de manière équilibré ([get_train_val_YOLO.py](https://gitlab.lis-lab.fr/stephane.chavin/yolo-dyni/-/blob/main/get_train_val_YOLO.py)) et un script qui permet de compiler les détections, d'un modèle entrainé, dans un dataframe ([get_yolo_detection.py](https://gitlab.lis-lab.fr/stephane.chavin/yolo-dyni/-/blob/main/get_yolo_detection.py)). diff --git a/get_json_file_YOLO.py b/get_json_file_YOLO.py index d95216bebb1cbc7d28fceb9b37c260b2ea66409d..1862034b91ca326e2fad214151b87334775b8ca3 100644 --- a/get_json_file_YOLO.py +++ b/get_json_file_YOLO.py @@ -11,41 +11,43 @@ def arg_directory(path): else: raise argparse.ArgumentTypeError(f'`{path}` is not a valid path') -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO') -parser.add_argument('-p','--path_to_json', type=arg_directory, help = 'Path of the folder that contain the .json',required=True) -parser.add_argument('-i','--path_to_img', type=arg_directory, help = 'Path of the folder that contain the .jpg',required=True) -parser.add_argument('-d','--directory', type=arg_directory, help = 'Directory to wich modified .json files will be stored',required=True) -args = parser.parse_args() - -filename = args.path_to_json -out_file = args.directory -img_path = args.path_to_img - -liste_file = os.listdir(filename) -liste_file = pd.DataFrame(liste_file, columns =['fn']) -liste_file['type'] = liste_file.fn.str.split('.').str[-1] -liste_file = liste_file[liste_file.type == 'json'] -liste_file.reset_index(inplace = True) - -for i in range (len(liste_file)): - if liste_file.fn[i][0] == '.': - liste_file = liste_file.drop(i) - -liste_file = liste_file.reset_index() - -for i, row in liste_file.iterrows(): - if len(row.fn) > 30: - data = labelme.LabelFile.load_image_file(os.path.join(img_path,str(row.fn[:-4]+'jpg'))) - image_data = base64.b64encode(data).decode('utf-8') - else: - continue - try : - len(data) - except TypeError: - continue - f = open(filename+row.fn,) - get_data = json.load(f) - get_data['imageData'] = image_data - get_data['imagePath'] = img_path+row.fn[:-4]+'jpg' - with open(out_file+row.fn, 'w') as f: - json.dump(get_data, f, indent=4) +def process_json_files(json_dir, img_dir, output_dir): + json_files = [f for f in os.listdir(json_dir) if f.endswith('.json')] + + for json_file in json_files: + if json_file.startswith('.'): + continue + + json_path = os.path.join(json_dir, json_file) + img_path = os.path.join(img_dir, json_file.replace('.json', '.jpg')) + + if not os.path.exists(img_path): + continue + + try: + with open(img_path, 'rb') as img_file: + image_data = base64.b64encode(img_file.read()).decode('utf-8') + except FileNotFoundError: + continue + + with open(json_path, 'r') as f: + json_data = json.load(f) + + json_data['imageData'] = image_data + json_data['imagePath'] = img_path + + output_path = os.path.join(output_dir, json_file) + with open(output_path, 'w') as f: + json.dump(json_data, f, indent=4) + +def main(): + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO') + parser.add_argument('-p', '--path_to_json', type=arg_directory, help='Path to the folder containing the .json files', required=True) + parser.add_argument('-i', '--path_to_img', type=arg_directory, help='Path to the folder containing the .jpg images', required=True) + parser.add_argument('-d', '--directory', type=arg_directory, help='Directory to which modified .json files will be stored', required=True) + args = parser.parse_args() + + process_json_files(args.path_to_json, args.path_to_img, args.directory) + +if __name__ == "__main__": + main() diff --git a/get_spectrogram.py b/get_spectrogram.py index 92dfed345e4757b4a2c7d08302998dff745fdee8..be398af994d16372a2fabe028b262de1b3209bc2 100644 --- a/get_spectrogram.py +++ b/get_spectrogram.py @@ -1,13 +1,11 @@ import os import librosa -import ipdb import glob import argparse import numpy as np import pandas as pd import matplotlib.pyplot as plt from p_tqdm import p_map -from tqdm import tqdm import warnings warnings.filterwarnings('ignore') @@ -18,69 +16,65 @@ def arg_directory(path): else: raise argparse.ArgumentTypeError(f'`{path}` is not a valid path') -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO') -parser.add_argument('-f','--file', type=str,help = 'Name of the file that contain the recording to print') -parser.add_argument('-p','--path_to_data', type=arg_directory, help = 'Path of the folder that contain the recordings') -parser.add_argument('-d','--directory', type=arg_directory, help = 'Directory to wich spectrogram will be stored') -parser.add_argument('-m','--mode', type=str,choices=['unique','multiple'], help = 'Direction of the saved spectrogram') -parser.add_argument('-n','--columns_name', type=str, help = 'Name of the columns that contain the path of the .wav') -parser.add_argument('-i','--input', type=str, choices=['file','folder'], help = 'Choose "file" if you have a .csv file or "folder" to export spectrogram from all the .wav of a folder') -args = parser.parse_args() +def create_spectrogram(y, directory, filename, offset, duration): + window_size = 1024 + window = np.hanning(window_size) + stft = librosa.core.spectrum.stft(y, n_fft=window_size, hop_length=512, window=window) -path_to_data = args.path_to_data -direction = args.directory -folder = 'Spectrogram/' + plt.close() + plt.figure() -#PARAMETERS -DURATION = 8 -OVERLAP = 2 + log_stft = np.log10(np.abs(stft)) + vmin, vmax = log_stft.min(), log_stft.max() -if args.mode == 'multiple': - NB_IMG_PER_REC = 30 -elif args.mode == 'unique': - NB_IMG_PER_REC = 1 + plt.imshow(log_stft[::-1], aspect="auto", interpolation=None, cmap='jet', vmin=vmin, vmax=vmax) + plt.subplots_adjust(top=1, bottom=0, left=0, right=1) -if args.input == 'file': - df = pd.read_csv(args.file,low_memory=False) - df['Path'] = df[args.columns_name] -elif args.input == 'folder': - df = pd.DataFrame(glob.glob(os.path.join(path_to_data,'*')),columns = ['Path']) - -def process(x): - _, (i) = x - - for count, j in enumerate(range (NB_IMG_PER_REC)): #30*8 secondes - 30*2 secondes (overlap) = 180 secondes affichées sur 30 images : n'affiche que les 3 premières minutes d'un enregistrement + name = os.path.join(directory, 'Spectrogram', f"{filename.replace('/', '_').split('.')[0]}_{offset}") + try: + plt.savefig(name + '.jpg') + except FileNotFoundError: + os.makedirs(os.path.join(directory, 'Spectrogram'), exist_ok=True) + plt.savefig(name + '.jpg') + +def process_recordings(args): + _, (i) = args + duration = 8 + overlap = 2 + + for count in range(args.img_per_rec): + offset = count * (duration - overlap) filename = str(i[0]) - offset = count * (DURATION - OVERLAP) + try: - y, sr = librosa.load(filename, offset = offset, duration = DURATION, sr = None) + y, _ = librosa.load(filename, offset=offset, duration=duration, sr=None) + create_spectrogram(y, args.directory, filename, offset, duration) except Exception: print(filename) - continue - window_size = 1024 - window = np.hanning(window_size) - stft = librosa.core.spectrum.stft(y, n_fft=window_size, hop_length=512, window=window) - - plt.close() - plt.figure() - - vmin = np.flipud(np.log10(np.abs(stft))).mean() - vmax = np.flipud(np.log10(np.abs(stft))).max() - - plt.imshow(np.flipud(np.log10(np.abs(stft))),aspect = "auto",interpolation = None,cmap = 'jet',vmin = vmin,vmax = vmax) - - plt.subplots_adjust(top=1, bottom=0, left=0, right=1) - - name = str(i[0].replace('/','_').split('.')[0]+'_'+str(offset)) #count - try : - plt.savefig(os.path.join(direction,folder, str(name+'.jpg'))) - - except FileNotFoundError: - print('creating the directory : ',os.path.join(direction,folder)) - os.mkdir(os.path.join(direction,folder)) - plt.savefig(os.path.join(direction,folder, str(name+'.jpg'))) - -p_map(process, enumerate(df.groupby('Path')), num_cpus=1, total = len(df.groupby('Path'))) -print('saved to ',str(directory+folder)) +if __name__ == "__main__": + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO') + parser.add_argument('-f', '--file', type=str, help='Name of the file that contains the recording to print') + parser.add_argument('-p', '--path_to_data', type=arg_directory, help='Path of the folder that contains the recordings', required=True) + parser.add_argument('-d', '--directory', type=arg_directory, help='Directory to which spectrograms will be stored', required=True) + parser.add_argument('-m', '--mode', type=str, choices=['unique', 'multiple'], help='Direction of the saved spectrogram') + parser.add_argument('-n', '--columns_name', type=str, help='Name of the columns that contain the path of the .wav') + parser.add_argument('-i', '--input', type=str, choices=['file', 'folder'], help='Choose "file" if you have a .csv file or "folder" to export spectrogram from all the .wav of a folder') + args = parser.parse_args() + + if args.mode == 'multiple': + img_per_rec = 30 + elif args.mode == 'unique': + img_per_rec = 1 + + path_to_data = args.path_to_data + + if args.input == 'file': + df = pd.read_csv(args.file, low_memory=False) + df['Path'] = df[args.columns_name] + elif args.input == 'folder': + df = pd.DataFrame(glob.glob(os.path.join(path_to_data, '*'), recursive=True), columns=['Path']) + + p_map(process_recordings, enumerate(df.groupby('Path'), img_per_rec=img_per_rec), num_cpus=1, total=len(df.groupby('Path'))) + print(f'Saved to {args.directory}/Spectrogram') diff --git a/get_time_freq_detection.py b/get_time_freq_detection.py index c71d873612243dbefaa2d4255e78f35e69a4e891..ea709d078ba7f593f5fc4e84df1c077286312d7a 100644 --- a/get_time_freq_detection.py +++ b/get_time_freq_detection.py @@ -1,9 +1,8 @@ import pandas as pd import os -import ipdb -from tqdm import tqdm import argparse from datetime import date +from tqdm import tqdm def arg_directory(path): if os.path.isdir(path): @@ -11,47 +10,44 @@ def arg_directory(path): else: raise argparse.ArgumentTypeError(f'`{path}` is not a valid path') -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO') -parser.add_argument('-p','--path_to_data', type=arg_directory, help = 'Path of the folder that contain the .txt files',required=True) -parser.add_argument('-d','--directory', type=arg_directory, help = 'Directory to wich the dataframe will be stored',required=True) -parser.add_argument('-t','--duration', type = int, help = 'Duration of the spectrogram', required = True) -parser.add_argument('-s','--SR', type = int, help = 'Sampling Rate of the spectrogram') -args = parser.parse_args() - -annots = args.path_to_data +def process_annotations(annotations_folder, duration, sr): + today = date.today() + out_file = f'YOLO_detection_{today.day}_{today.month}' -today = date.today() -out_file = str('YOLO_detection'+str('_'+str(today.day)+'_'+str(today.month))) + df_list = [] + names = [] # Add your class names here -outdir = args.directory + for file_name in tqdm(os.listdir(annotations_folder)): + if file_name.endswith('.txt'): + file_path = os.path.join(annotations_folder, file_name) + annotation_df = pd.read_csv(file_path, sep=' ', names=['espece', 'x', 'y', 'w', 'h']) -df = pd.concat({f:pd.read_csv(os.path.join(annots, f), sep=' ', names=['espece', 'x', 'y', 'w', 'h']) - for f in tqdm(os.listdir(annots))}, names=['file']) + annotation_df['file'] = file_name + annotation_df['idx'] = annotation_df['file'].str.split('_').str[-1].str.split('.').str[0] + annotation_df['file'] = annotation_df['file'].str.rsplit('.', 1).str[0] + '.wav' -df = df.reset_index(level=[0]) -df = df.reset_index() -del df['index'] -df['idx'] = df.file.str.split('_').str[-1].str.split('.').str[0] -df.file = df.file.str.rsplit('.',1).str[0]+'.wav' + annotation_df['annot'] = annotation_df['espece'].apply(lambda x: names[x]) -DUREE_SPECTRO = args.duration -SR = args.SR + annotation_df['midl'] = (annotation_df['x'] * duration) + annotation_df['idx'].astype(int) + annotation_df['freq_center'] = (1 - annotation_df['y']) * (sr / 2) + annotation_df['freq_min'] = annotation_df['freq_center'] - (annotation_df['h'] * (sr / 2)) / 2 + annotation_df['freq_max'] = annotation_df['freq_center'] + (annotation_df['h'] * (sr / 2)) / 2 + annotation_df['start'] = annotation_df['midl'] - (annotation_df['w'] * duration) / 2 + annotation_df['stop'] = annotation_df['midl'] + (annotation_df['w'] * duration) / 2 + annotation_df['duration'] = annotation_df['stop'] - annotation_df['start'] -#put the classes here -names = [] + df_list.append(annotation_df) -df['annot'] = 'None' -for j in range (len(df)): - df.loc[j,('annot')] = names[int(df.espece.iloc[j])] + result_df = pd.concat(df_list, ignore_index=True) + result_df.to_csv(os.path.join(outdir, f'{out_file}.csv'), index=False) + print(f'Saved as {os.path.join(outdir, f"{out_file}.csv")}') -print('Calculating the positions','\n') -df['midl'] = (df.x*DUREE_SPECTRO)+(df.idx.astype(int)) -df['freq_center'] = (1-df.y)*(SR/2) -df['freq_min'] = df.freq_center - (df.h*(SR/2))/2 -df['freq_max'] = df.freq_center + (df.h*(SR/2))/2 -df['start'] = df.midl - (df.w * DUREE_SPECTRO)/2 -df['stop'] = df.midl + (df.w * DUREE_SPECTRO)/2 -df['duration'] = df.stop - df.start +if __name__ == "__main__": + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO') + parser.add_argument('-p', '--path_to_data', type=arg_directory, help='Path of the folder that contains the .txt files', required=True) + parser.add_argument('-d', '--directory', type=arg_directory, help='Directory where the dataframe will be stored', required=True) + parser.add_argument('-t', '--duration', type=int, help='Duration of the spectrogram', required=True) + parser.add_argument('-s', '--SR', type=int, help='Sampling Rate of the spectrogram') + args = parser.parse_args() -df.to_csv(os.path.join(outdir,str(out_file+'.csv')), index= False) -print('saved as ',os.path.join(outdir,str(out_file+'.csv'))) + process_annotations(args.path_to_data, args.duration, args.SR) diff --git a/get_train_annot_YOLO.py b/get_train_annot_YOLO.py index 19d4b1c2fac73c020f8df0173a11cdc75d728793..55d63ee599e4f478c6aa562615f60d6958bd0a84 100644 --- a/get_train_annot_YOLO.py +++ b/get_train_annot_YOLO.py @@ -3,223 +3,167 @@ import pandas as pd import librosa import numpy as np import matplotlib.pyplot as plt -from p_tqdm import p_map -import ipdb import random from datetime import date import argparse +from p_tqdm import p_map import cv2 -import matplotlib.patches as patches -from matplotlib.patches import Rectangle -from random import randrange -from PIL import Image from mycolorpy import colorlist as mcp -today = date.today() - def arg_directory(path): if os.path.isdir(path): return path else: raise argparse.ArgumentTypeError(f'`{path}` is not a valid path') -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO') -parser.add_argument('-f','--filename_path', type= str, help = 'Path and name of the file containing the annotations',required=True) -parser.add_argument('-p','--path_to_data', type=arg_directory, help = 'Path of the folder that contain the recordings',required=True) -parser.add_argument('-d','--directory', type=arg_directory, help = 'Directory to wich spectrograms and .txt files will be stored',required=True) -parser.add_argument('-m','--mode',type=str,choices=['uniform','personalized'],help = 'Choose the mode to calculate the y and height value',required=True) -parser.add_argument('-u','--unique',type=str, choices=['unique','multiple'], help = 'unique for only one spectrogram per file, multple for multiple spectrogram',required=True) -parser.add_argument('-c','--columns_name',type=str,help = 'Name of the column that contain the path',required=True) -parser.add_argument('--export',type=str, default=None, help='To export the position of the bounding box on the spectrogram',required=False) -args = parser.parse_args() - -directory = args.directory - -DURATION = 8 -NB_CLASS = 5 - -df = pd.read_csv(args.filename_path, low_memory=False) - -df.rename(columns={'label':'Code'},inplace=True) -df.rename(columns={'annotation_initial_time':'start'},inplace=True) -df.rename(columns={'annotation_final_time':'stop'},inplace=True) -df.rename(columns={'duree':'d_annot'},inplace=True) -df.rename(columns={'min_frequency':'min_freq'},inplace=True) -df.rename(columns={'max_frequency':'max_freq'},inplace=True) -df.rename(columns={'avg_frequency':'midl_y'},inplace=True) - -tab = df.groupby('Code').count() -tab = tab.sort_values(tab.columns[0],ascending = False)[:NB_CLASS] -df = df[df.Code.isin(tab.index)] - -try : - df['max_freq'].fillna(9000,inplace = True) - df['min_freq'].fillna(1000,inplace = True) -except Exception: - df['max_freq'] = 9000 - df['min_freq'] = 1000 - df['midl_y'] = 5000 - -df['d_annot'] = df.stop - df.start -df['midl'] = (df.stop + df.start)/2 -df['Path'] = df[args.columns_name] - -df = df[df.d_annot<8] -df = df.reset_index() - -list_espece = df.groupby('Code').count().sort_values(df.columns[0],ascending = False) -data = pd.DataFrame(columns = ['espece','ind']) - -for i in range (len(list_espece)): - esp = list_espece.index[i] - new_col = pd.DataFrame([[esp,i]],columns = ['espece','ind']) - data = pd.concat([data,new_col]) - -liste_espece = data.espece -liste_espece.to_csv(str(directory+'liste_especes.csv'),index = False) - -print('\n',data) - -#color = mcp.gen_color(cmap = "Wistia", n= len(list_espece)) - -colors = pd.DataFrame(columns = ['color', 'species']) - -for i in range (30): - r = randrange(255) - g = randrange(255) - b = randrange(255) - rand_color = (r, g, b) - new = pd.DataFrame([[rand_color, i]], columns = ['color', 'species']) - colors = pd.concat([colors, new]) - - -def process(x): - count, (f, grp) = x - filename = str(f) - duration = DURATION - - while len(grp) != 0: - - tab = grp[grp.midl <= grp.start.iloc[0]+7] - fin = pd.DataFrame(columns = ['id','x', 'y', 'width', 'height']) - duree = tab.d_annot - - if len(tab)==0: - tab = grp - print(tab) - rd = round(random.uniform(-1.5,1.5),2) - - if args.unique == 'multiple': - if tab.start.iloc[0] <= 3: +def process_annotations(file_path, duration, mode, unique, columns_name, export): + today = date.today() + + df = pd.read_csv(file_path, low_memory=False) + df.rename(columns={'label': 'Code', 'annotation_initial_time': 'start', 'annotation_final_time': 'stop', 'duree': 'd_annot', 'min_frequency': 'min_freq', 'max_frequency': 'max_freq', 'avg_frequency': 'midl_y'}, inplace=True) + df['max_freq'].fillna(9000, inplace=True) + df['min_freq'].fillna(1000, inplace=True) + df['midl_y'].fillna(5000, inplace=True) + + NB_CLASS = 5 + tab = df.groupby('Code').count() + tab = tab.sort_values(tab.columns[0], ascending=False)[:NB_CLASS] + df = df[df.Code.isin(tab.index)] + df['d_annot'] = df.stop - df.start + df['midl'] = (df.stop + df.start) / 2 + df['Path'] = df[columns_name] + + df = df[df.d_annot < 8] + df = df.reset_index() + + # Add your class names to the 'colors' list + colors = [(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) for _ in range(30)] + + def process(x): + count, (f, grp) = x + filename = str(f) + duration = DURATION + + while len(grp) != 0: + tab = grp[grp.midl <= grp.start.iloc[0] + 7] + fin = pd.DataFrame(columns=['id', 'x', 'y', 'width', 'height']) + duree = tab.d_annot + + if len(tab) == 0: + tab = grp + + rd = round(random.uniform(-1.5, 1.5), 2) + + if unique == 'multiple': + if tab.start.iloc[0] <= 3: + offset = 0 + elif tab.start.iloc[0] == 3: + offset = 1 + elif tab.start.iloc[0] >= 3: + offset = tab.midl.iloc[0] - 3.5 + rd + elif unique == 'unique': offset = 0 - if tab.start.iloc[0] == 3: - offset = 1 - if tab.start.iloc[0] >= 3: - offset = tab.midl.iloc[0] - 3.5 + rd - elif args.unique == 'unique': - offset= 0 - window_size = 1024 - window = np.hanning(window_size) + window_size = 1024 + window = np.hanning(window_size) + y, sr = librosa.load(filename, offset=offset, duration=duration, sr=None) + stft = librosa.core.spectrum.stft(y, n_fft=window_size, hop_length=512, window=window) + spectrum, freq, time, im = plt.specgram(y, Fs=sr, NFFT=window_size, noverlap=512, cmap='jet') - y, sr = librosa.load(filename, offset = offset, duration = duration, sr = None) - stft = librosa.core.spectrum.stft(y, n_fft=window_size, hop_length=512, window=window) - spectrum, freq, time, im = plt.specgram(y, Fs=sr, NFFT=window_size, noverlap=512, cmap='jet') + vmin = np.flipud(np.log10(np.abs(stft))).mean() + vmax = np.flipud(np.log10(np.abs(stft))).max() - vmin = np.flipud(np.log10(np.abs(stft))).mean() - vmax = np.flipud(np.log10(np.abs(stft))).max() - - plt.close() - plt.imshow(np.flipud(np.log10(np.abs(stft))),aspect = "auto", interpolation = None, cmap = 'jet', vmin = vmin, vmax = vmax) - plt.subplots_adjust(top=1, bottom=0, left=0, right=1) - - for idxs, row in tab.iterrows(): - species = row.Code - x_pxl = (row.midl - offset) / duration - width_pxl = (row.stop-row.start)/duration - - if args.mode == 'uniform': - height_pxl = 0.8 - y_pxl = 0.5 - - else: - y_pxl = 1 - (row.midl_y / (sr/2)) - - height_pxl = (row.max_freq - row.min_freq)/(sr/2) - if height_pxl > 1: - height_pxl = 1 - elif height_pxl > y_pxl*2: - y_pxl=y_pxl+0.5*(height_pxl-y_pxl*2) - - annotation = pd.DataFrame([[str(data.loc[data.espece == row.Code,'ind'][0]),x_pxl,y_pxl,width_pxl,height_pxl]],columns = ['id','x', 'y', 'width', 'height']) - - fin = pd.concat([fin,annotation]) - grp = grp.drop(tab.index) - - name = str(row.Path.replace('/','_').replace('.','_')+'_'+str(count)) - name_file = os.path.join(directory,str('labels_'+str(today.day)+'_'+str(today.month)),str(name+'.txt')) - - try : - plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),row.Code,str(name+'.jpg'))) - fin.to_csv(name_file,sep =' ',header=False,index=False) - plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all',str(name+'.jpg'))) - - except : - os.mkdir(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)))) - for especes in list_espece.index: - - os.mkdir(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),especes)) - os.mkdir(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all')) - os.mkdir(os.path.join(directory,str('labels_'+str(today.day)+'_'+str(today.month)))) - - fin.to_csv(name_file,sep =' ',header=False,index=False) - plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),row.Code,str(name+'.jpg'))) - plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all',str(name+'.jpg'))) - - plt.close() - - if args.export != None: - - im = cv2.imread(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all',str(name+'.jpg'))) - im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) - H,W = im.shape[0], im.shape[1] - for l in range(len(fin)): - - x, y, w, h = fin.x.iloc[l]*W , fin.y.iloc[l]*H , fin.width.iloc[l]*W , fin.height.iloc[l]*H - - shape1 = (int(x-(0.5*w)), int(y+(0.5*h))) - shape2 = (int(x+(0.5*w)), int(y+(0.5*h))) - shape3 = (int(x+-(0.5*w)), int(y-(0.5*h))) - shape4 = (int(x+(0.5*w)), int(y-(0.5*h))) + plt.close() + plt.imshow(np.flipud(np.log10(np.abs(stft)), aspect='auto', interpolation=None, cmap='jet', vmin=vmin, vmax=vmax)) + plt.subplots_adjust(top=1, bottom=0, left=0, right=1) - #rectangle text shape + for idxs, row in tab.iterrows(): + species = row.Code + x_pxl = (row.midl - offset) / duration + width_pxl = (row.stop - row.start) / duration - shp1 = shape4[0]-10, shape4[1]+20 - shp2 = shape4[0], shape4[1]+20 - shp3 = shape4[0]-10, shape4[1] - shp4 = shape4[0], shape4[1] + if mode == 'uniform': + height_pxl = 0.8 + y_pxl = 0.5 + else: + y_pxl = 1 - (row.midl_y / (sr / 2)) + height_pxl = (row.max_freq - row.min_freq) / (sr / 2) + if height_pxl > 1: + height_pxl = 1 + elif height_pxl > y_pxl * 2: + y_pxl = y_pxl + 0.5 * (height_pxl - y_pxl * 2) - #text placement + annotation = pd.DataFrame([[str(data.loc[data.espece == row.Code, 'ind'][0]), x_pxl, y_pxl, width_pxl, height_pxl]], + columns=['id', 'x', 'y', 'width', 'height']) + fin = pd.concat([fin, annotation]) - text_shape = shp1[0], shp1[1]-5 + grp = grp.drop(tab.index) - label = str(fin.id.iloc[l]) - cv2.rectangle(im, pt1=shape1, pt2=shape4, color= colors[colors.species == label].color, thickness=1) + name = str(row.Path.replace('/', '_').replace('.', '_') + '_' + str(count)) + name_file = os.path.join(directory, str('labels_' + str(today.day) + '_' + str(today.month)), str(name + '.txt')) - cv2.rectangle(im, pt1=shp1 , pt2= shp4, color= colors[colors.species == label].color, thickness= -1) - cv2.putText(im, label, text_shape, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1) - - plt.imshow(im) - plt.subplots_adjust(top=1, bottom=0, left=0, right=1) try: - plt.savefig(os.path.join(directory, str('images_annotes_'+str(today.day)+'_'+str(today.month)),str(name+'.jpg'))) - - except Exception: - os.mkdir(os.path.join(directory,str('images_annotes_'+str(today.day)+'_'+str(today.month)))) - plt.savefig(os.path.join(directory, str('images_annotes_'+str(today.day)+'_'+str(today.month)),str(name+'.jpg'))) - - plt.close() - -p_map(process, enumerate(df.groupby('Path')), num_cpus=2, total = len(df.groupby('Path'))) - -print('saved to ',directory) + plt.savefig(os.path.join(directory, str('images_' + str(today.day) + '_' + str(today.month)), row.Code, str(name + '.jpg'))) + fin.to_csv(name_file, sep=' ', header=False, index=False) + plt.savefig(os.path.join(directory, str('images_' + str(today.day) + '_' + str(today.month)), 'all', + str(name + '.jpg'))) + except: + os.mkdir(os.path.join(directory, str('images_' + str(today.day) + '_' + str(today.month)))) + for especes in list_espece.index: + os.mkdir(os.path.join(directory, str('images_' + str(today.day) + '_' + str(today.month), especes))) + os.mkdir(os.path.join(directory, str('images_' + str(today.day) + '_' + str(today.month), 'all'))) + os.mkdir(os.path.join(directory, str('labels_' + str(today.day) + '_' + str(today.month)))) + fin.to_csv(name_file, sep=' ', header=False, index=False) + plt.savefig(os.path.join(directory, str('images_' + str(today.day) + '_' + str(today.month)), row.Code, + str(name + '.jpg'))) + plt.savefig(os.path.join(directory, str('images_' + str(today.day) + '_' + str(today.month)), 'all', + str(name + '.jpg'))) + + plt.close() + + if export: + im = cv2.imread( + os.path.join(directory, str('images_' + str(today.day) + '_' + str(today.month)), 'all', str(name + '.jpg'))) + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + H, W = im.shape[0], im.shape[1] + for l in range(len(fin)): + x, y, w, h = fin.x.iloc[l] * W, fin.y.iloc[l] * H, fin.width.iloc[l] * W, fin.height.iloc[l] * H + shape1 = (int(x - 0.5 * w), int(y + 0.5 * h)) + shape2 = (int(x + 0.5 * w), int(y + 0.5 * h)) + shape3 = (int(x - 0.5 * w), int(y - 0.5 * h)) + shape4 = (int(x + 0.5 * w), int(y - 0.5 * h) + ) + shp1 = shape4[0] - 10, shape4[1] + 20 + shp2 = shape4[0], shape4[1] + 20 + shp3 = shape4[0] - 10, shape4[1] + shp4 = shape4[0], shape4[1] + text_shape = shp1[0], shp1[1] - 5 + label = str(fin.id.iloc[l]) + cv2.rectangle(im, pt1=shape1, pt2=shape4, color=colors[colors.species == label].color, thickness=1) + cv2.rectangle(im, pt1=shp1, pt2=shp4, color=colors[colors.species == label].color, thickness=-1) + cv2.putText(im, label, text_shape, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) + plt.imshow(im) + plt.subplots_adjust(top=1, bottom=0, left=0, right=1) + try: + plt.savefig( + os.path.join(directory, str('images_annotes_' + str(today.day) + '_' + str(today.month), str(name + '.jpg')))) + except Exception: + os.mkdir(os.path.join(directory, str('images_annotes_' + str(today.day) + '_' + str(today.month)))) + plt.savefig(os.path.join(directory, str('images_annotes_' + str(today.day) + '_' + str(today.month), str(name + '.jpg')))) + plt.close() + + p_map(process, enumerate(df.groupby('Path')), num_cpus=2, total=len(df.groupby('Path'))) + print('saved to', directory) + +if __name__ == '__main__': + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO') + parser.add_argument('-f', '--filename_path', type=str, help='Path and name of the file containing the annotations', required=True) + parser.add_argument('-p', '--path_to_data', type=arg_directory, help='Path of the folder that contains the recordings', required=True) + parser.add_argument('-d', '--directory', type=arg_directory, help='Directory to which spectrograms and .txt files will be stored', required=True) + parser.add_argument('-m', '--mode', type=str, choices=['uniform', 'personalized'], help='Choose the mode to calculate the y and height value', required=True) + parser.add_argument('-u', '--unique', type=str, choices=['unique', 'multiple'], help='unique for only one spectrogram per file, multiple for multiple spectrograms', required=True) + parser.add_argument('-c', '--columns_name', type=str, help='Name of the column that contains the path', required=True) + parser.add_argument('--export', type=str, default=None, help='To export the position of the bounding box on the spectrogram', required=False) + args = parser.parse_args() + + process_annotations(args.filename_path, duration=8, mode=args.mode, unique=args.unique, columns_name=args.columns_name, export=args.export) diff --git a/get_train_val_YOLO.py b/get_train_val_YOLO.py index c0c8ec00a3e5251fd310cadd37f654db1241ad96..7539eed7cc23f58ff36adfcb08fdebd57f55e671 100644 --- a/get_train_val_YOLO.py +++ b/get_train_val_YOLO.py @@ -5,7 +5,6 @@ from tqdm import tqdm import numpy as np import shutil import argparse -import ipdb def arg_directory(path): if os.path.isdir(path): @@ -13,83 +12,87 @@ def arg_directory(path): else: raise argparse.ArgumentTypeError(f'`{path}` is not a valid path') -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO') -parser.add_argument('-r','--ratio', type=float, default = 0.7,help = 'Train Ratio (val = 1 - ratio)') -parser.add_argument('-p','--path_to_data', type=arg_directory, help = 'Path of the folder that contain the .txt (ending with labels/)',required=True) -parser.add_argument('-d','--direction', type=arg_directory, help = 'Directory to wich spectrogram and .txt files will be stored (different from -p)',required=True) -args = parser.parse_args() - -path = args.path_to_data -direction = args.direction - -NB_CLASS = 2 - -df = pd.concat({f:pd.read_csv(os.path.join(path, f), sep=' ', names=['espece', 'x', 'y', 'w', 'h']) - for f in tqdm(os.listdir(path))}, names=['file']) - -df = df.reset_index(level=[0]) -df = df.reset_index() -del df['index'] - -df = df[df.espece!='y'] -df.espece = df.espece.astype(float) -tab = df.groupby('espece').count() -tab = tab.sort_values(tab.columns[0],ascending = False)[:NB_CLASS] -compte = pd.DataFrame(np.zeros((len(tab)+1,1)),columns = ['nombre']) - -nb_val = int(tab.file.min()*(1-args.ratio)) -esp_min = tab[tab.file == tab.file.min()].index[0] - -val = df[df.file.isin(df[df.espece == esp_min].sample(int(nb_val)).file)] -df.drop(val.index,inplace=True) - -for n in tab.iterrows(): - if n[0] == esp_min: - continue - else: - nb_val = len(df[df.espece == n[0]])*(1-args.ratio) - new_row = df[df.file.isin(df[df.espece == n[0]].sample(int(nb_val)).file)] - val = pd.concat((val, new_row)) - df = df.drop(new_row.index) - -compte = val.groupby('espece').count() - -val = pd.DataFrame(val.groupby('file').count().index,columns = ['file']) -train = pd.DataFrame(df.groupby('file').count().index,columns = ['file']) - -val.file = val.file.str.rsplit('.',1).str[0] -train.file = train.file.str.rsplit('.',1).str[0] - -isExist = os.path.exists(os.path.join(direction,'images')) - -if not isExist: - os.mkdir(os.path.join(direction,'images')) - os.mkdir(os.path.join(direction,'images/train')) - os.mkdir(os.path.join(direction,'images/val')) - os.mkdir(os.path.join(direction,'labels')) - os.mkdir(os.path.join(direction,'labels/train')) - os.mkdir(os.path.join(direction,'labels/val')) - -for i,row in tqdm(val.iterrows(), total=val.shape[0]): - - shutil.copy2(os.path.join(path, str(row.file+'.txt')), os.path.join(direction, str('labels/val/'+row.file+'.txt'))) - shutil.copy2(os.path.join(path, str('../images/all/'+row.file+'.jpg')), os.path.join(direction, str('images/val/'+row.file+'.jpg'))) - -for i,row in tqdm(train.iterrows(), total=train.shape[0]): - - shutil.copy2(os.path.join(path, str(row.file+'.txt')), os.path.join(direction, str('labels/train/'+row.file+'.txt'))) - shutil.copy2(os.path.join(path, str('../images/all/'+row.file+'.jpg')), os.path.join(direction, str('images/train/'+row.file+'.jpg'))) - -try: - liste_espece = pd.read_csv(str(path+'../liste_especes.csv')) -except Exception: - print('No species list detected, please add to ',str(direction+'custom_data.yaml')) - -with open(str(direction+'custom_data.yaml'),'w') as f: - f.write('train : '+direction+'images/train\n') - f.write('val : '+direction+'images/val\n') - f.write('nc : ' + str(len(liste_espece))+'\n') - try: - f.write('names : ' + str(liste_espece.espece.tolist())) - except Exception: - print('.yaml saved to ',str(direction+'custom_data.yaml')) \ No newline at end of file +def create_directory_if_not_exists(directory): + if not os.path.exists(directory): + os.mkdir(directory) + +def copy_files_to_directory(file_list, source_dir, destination_dir): + for file_name in file_list: + source_path = os.path.join(source_dir, f'{file_name}.txt') + destination_path = os.path.join(destination_dir, f'{file_name}.txt') + shutil.copy2(source_path, destination_path) + +def process_data(args): + path = args.path_to_data + direction = args.direction + + NB_CLASS = 2 + + df = pd.concat({f: pd.read_csv(os.path.join(path, f), sep=' ', names=['espece', 'x', 'y', 'w', 'h']) + for f in tqdm(os.listdir(path))}, names=['file']) + + df = df.reset_index(level=[0]) + df = df.reset_index() + del df['index'] + + df = df[df.espece != 'y'] + df.espece = df.espece.astype(float) + tab = df.groupby('espece').count() + tab = tab.sort_values(tab.columns[0], ascending=False)[:NB_CLASS] + compte = pd.DataFrame(np.zeros((len(tab) + 1, 1)), columns=['nombre']) + + nb_val = int(tab.file.min() * (1 - args.ratio)) + esp_min = tab[tab.file == tab.file.min()].index[0] + + val = df[df.file.isin(df[df.espece == esp_min].sample(int(nb_val)).file)] + df.drop(val.index, inplace=True) + + for n in tab.iterrows(): + if n[0] == esp_min: + continue + else: + nb_val = len(df[df.espece == n[0]]) * (1 - args.ratio) + new_row = df[df.file.isin(df[df.espece == n[0]].sample(int(nb_val)).file)] + val = pd.concat((val, new_row)) + df = df.drop(new_row.index) + + compte = val.groupby('espece').count() + + val = pd.DataFrame(val.groupby('file').count().index, columns=['file']) + train = pd.DataFrame(df.groupby('file').count().index, columns=['file']) + + val.file = val.file.str.rsplit('.', 1).str[0] + train.file = train.file.str.rsplit('.', 1).str[0] + + create_directory_if_not_exists(os.path.join(direction, 'images')) + create_directory_if_not_exists(os.path.join(direction, 'images/train')) + create_directory_if_not_exists(os.path.join(direction, 'images/val')) + create_directory_if_not_exists(os.path.join(direction, 'labels')) + create_directory_if_not_exists(os.path.join(direction, 'labels/train')) + create_directory_if_not_exists(os.path.join(direction, 'labels/val')) + + copy_files_to_directory(val.file, path, os.path.join(direction, 'labels/val')) + copy_files_to_directory(val.file, os.path.join(path, '../images/all'), os.path.join(direction, 'images/val')) + + copy_files_to_directory(train.file, path, os.path.join(direction, 'labels/train')) + copy_files_to_directory(train.file, os.path.join(path, '../images/all'), os.path.join(direction, 'images/train')) + + try: + liste_espece = pd.read_csv(os.path.join(path, '../liste_especes.csv')) + except Exception: + print('No species list detected, please add it to', os.path.join(direction, 'custom_data.yaml')) + + with open(os.path.join(direction, 'custom_data.yaml'), 'w') as f: + f.write(f'train: {os.path.join(direction, "images/train")}\n') + f.write(f'val: {os.path.join(direction, "images/val")}\n') + f.write(f'nc: {len(liste_espece)}\n') + f.write(f'names: {liste_espece.espece.tolist()}') + +if __name__ == '__main__': + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO') + parser.add_argument('-r', '--ratio', type=float, default=0.7, help='Train Ratio (val = 1 - ratio)') + parser.add_argument('-p', '--path_to_data', type=arg_directory, help='Path of the folder that contains the .txt (ending with labels/)', required=True) + parser.add_argument('-d', '--direction', type=arg_directory, help='Directory to which spectrogram and .txt files will be stored (different from -p)', required=True) + args = parser.parse_args() + + process_data(args) diff --git a/get_yolo_detection.py b/get_yolo_detection.py index 180da13fe0418c3f295dbd7e81684bfe462cc6c9..6146e95c6e806b82298821ba21fd1ad848bf46f5 100644 --- a/get_yolo_detection.py +++ b/get_yolo_detection.py @@ -1,6 +1,5 @@ import pandas as pd import os -import ipdb from tqdm import tqdm import argparse from datetime import date @@ -11,36 +10,39 @@ def arg_directory(path): else: raise argparse.ArgumentTypeError(f'`{path}` is not a valid path') -parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO') -parser.add_argument('-p','--path_to_data', type=arg_directory, help = 'Path of the folder that contain the .txt files',required=True) -parser.add_argument('-d','--direction', type=arg_directory, help = 'Directory to wich the dataframe will be stored',required=True) -args = parser.parse_args() +def process_data(args): + annots = args.path_to_data + today = date.today() + out_file = f'YOLO_detection_{today.day}_{today.month}' + outdir = args.direction -annots = args.path_to_data + # Define constants + DUREE_SPECTRO = 8 + OVERLAP = 2 -today = date.today() -out_file = str('YOLO_detection_'+str('_'+str(today.day)+'_'+str(today.month))) + # Load and process data + df = pd.concat({f: pd.read_csv(os.path.join(annots, f), sep=' ', names=['espece', 'x', 'y', 'w', 'h', 'conf']) + for f in tqdm(os.listdir(annots))}, names=['file']) -outdir = args.direction + df = df.reset_index(level=[0]) + df = df.reset_index() + del df['index'] + df['idx'] = df.file.str.split('_').str[-1].str.split('.').str[0] + df.file = df.file.str.rsplit('.', 1).str[0] + '.wav' -df = pd.concat({f:pd.read_csv(os.path.join(annots, f), sep=' ', names=['espece', 'x', 'y', 'w', 'h','conf']) - for f in tqdm(os.listdir(annots))}, names=['file']) + # Define class names + names = [] -df = df.reset_index(level=[0]) -df = df.reset_index() -del df['index'] -df['idx'] = df.file.str.split('_').str[-1].str.split('.').str[0] -df.file = df.file.str.rsplit('.',1).str[0]+'.wav' + df['annot'] = df['espece'].apply(lambda x: names[int(x)]) -DUREE_SPECTRO = 8 -OVERLAP = 2 + # Save the processed DataFrame to a CSV file + df.to_csv(os.path.join(outdir, f'{out_file}.csv'), index=False) + print(f'Saved as {os.path.join(outdir, f"{out_file}.csv")}') -#put the classes here -names = [] +if __name__ == "__main__": + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO') + parser.add_argument('-p', '--path_to_data', type=arg_directory, help='Path of the folder that contains the .txt files', required=True) + parser.add_argument('-d', '--direction', type=arg_directory, help='Directory to which the dataframe will be stored', required=True) + args = parser.parse_args() -df['annot'] = 'None' -for j in range (len(df)): - df['annot'].iloc[j] = names[int(df.espece.iloc[j])] - -df.to_csv(os.path.join(outdir,str(out_file+'.csv')), index= False) -print('saved as ',os.path.join(outdir,str(out_file+'.csv'))) + process_data(args) diff --git a/labelme2yolo.py b/labelme2yolo.py index 6c1a7c0955ee4a659331a45d217bf81beed4719a..5ba5fa268fa120bd78be874bc80e5b2261a3b5d7 100644 --- a/labelme2yolo.py +++ b/labelme2yolo.py @@ -1,11 +1,10 @@ import os import json -import random import base64 import shutil import argparse -from pathlib import Path from glob import glob +from pathlib import Path def arg_directory(path): if os.path.isdir(path): @@ -13,55 +12,61 @@ def arg_directory(path): else: raise argparse.ArgumentTypeError(f'`{path}` is not a valid path') -parser = argparse.ArgumentParser(description='Convert LabelMe annotations to YOLO compatible') -parser.add_argument('-p','--path_to_data',type=arg_directory,help='Path to LabelMe annotations') -parser.add_argument('-d','--directory',type=arg_directory,help='Directory to which YOLO annotations will be stored') -args = parser.parse_args() +def convert_labelme_to_yolo(labelme_annotation_path, yolo_directory): + # Load LabelMe annotation + image_id = Path(labelme_annotation_path).stem + with open(labelme_annotation_path, 'r') as labelme_annotation_file: + labelme_annotation = json.load(labelme_annotation_file) + + # YOLO annotation and image paths + yolo_annotation_path = os.path.join(yolo_directory, 'labels', f'{image_id}.txt') + yolo_image_path = os.path.join(yolo_directory, 'images', f'{image_id}.jpg') + + with open(yolo_annotation_path, 'w') as yolo_annotation_file: + yolo_image_data = base64.b64decode(labelme_annotation['imageData']) -# YOLO metadata and files -yolo_names = [] -yolo_names_path = os.path.join(args.directory, 'custom.names') + # Write YOLO image + with open(yolo_image_path, 'wb') as yolo_image_file: + yolo_image_file.write(yolo_image_data) -# Convert image annotations + # Write YOLO image annotation + for shape in labelme_annotation['shapes']: + if shape['shape_type'] != 'rectangle': + print(f'Invalid type `{shape["shape_type"]}` in annotation `{labelme_annotation_path}`') + continue -for index, labelme_annotation_path in enumerate(glob(f'{args.path_to_data}/*.json')): - image_id = os.path.basename(labelme_annotation_path).rstrip('.json') + label = shape['label'] + x1, y1 = shape['points'][0] + x2, y2 = shape['points'][1] + width = x2 - x1 + height = y2 - y1 + x_center = (x1 + x2) / 2 + y_center = (y1 + y2) / 2 - labelme_annotation_file = open(labelme_annotation_path, 'r') - labelme_annotation = json.load(labelme_annotation_file) + annotation_line = f'{label} {x_center} {y_center} {width} {height}\n' + yolo_annotation_file.write(annotation_line) - yolo_annotation_path = os.path.join(args.directory,'labels', image_id + '.txt') - yolo_annotation_file = open(yolo_annotation_path, 'w') - yolo_image = base64.decodebytes(labelme_annotation['imageData'].encode()) - yolo_image_path = os.path.join(args.directory, 'labels', image_id + '.jpg') +def main(args): + yolo_names = set() - # Write YOLO image (and it to the list) - yolo_image_file = open(yolo_image_path, 'wb') - yolo_image_file.write(yolo_image) - yolo_image_file.close() + for labelme_annotation_path in glob(f'{args.path_to_data}/*.json'): + convert_labelme_to_yolo(labelme_annotation_path, args.directory) - # Write YOLO image annotation - for shape in labelme_annotation['shapes']: - if shape['shape_type'] != 'rectangle': - print( - f'Invalid type `{shape["shape_type"]}` in annotation `annotation_path`') - continue - if shape['label'] not in yolo_names: - yolo_names.append(shape['label']) + with open(labelme_annotation_path, 'r') as labelme_annotation_file: + labelme_annotation = json.load(labelme_annotation_file) - points = shape['points'] - scale_width = 1.0 / labelme_annotation['imageWidth'] - scale_height = 1.0 / labelme_annotation['imageHeight'] - width = abs(points[1][0] - points[0][0]) * scale_width - height = abs(points[1][1] - points[0][1]) * scale_height - x = (abs(points[1][0] + points[0][0]) / 2) * scale_width - y = (abs(points[1][1] + points[0][1]) / 2) * scale_height + for shape in labelme_annotation['shapes']: + yolo_names.add(shape['label']) - object_class = shape['label'] - yolo_annotation_file.write(f'{object_class} {x} {y} {width} {height}\n') + # Write YOLO names + yolo_names_path = os.path.join(args.directory, 'custom.names') + with open(yolo_names_path, 'w') as yolo_names_file: + yolo_names_file.write('\n'.join(yolo_names)) -# Write YOLO names -yolo_names_file = open(yolo_names_path, 'w') -yolo_names_file.write(os.linesep.join(yolo_names)) -yolo_names_file.close() +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Convert LabelMe annotations to YOLO compatible') + parser.add_argument('-p', '--path_to_data', type=arg_directory, help='Path to LabelMe annotations', required=True) + parser.add_argument('-d', '--directory', type=arg_directory, help='Directory to which YOLO annotations will be stored', required=True) + args = parser.parse_args() + main(args)