Skip to content
Snippets Groups Projects
Commit 2afd11a9 authored by Stephane Chavin's avatar Stephane Chavin
Browse files

improve code

parent bed2c86e
Branches
No related tags found
No related merge requests found
# YOLO-DYNI
Ce git a été créé avec comme objectif une prise en main de YOLOV5 plus facile.
Il contient notamment un script permettant d'extraire les spectrogrammes de plusieurs enregistrements ([get_spectrogram.py](https://gitlab.lis-lab.fr/stephane.chavin/yolo-dyni/-/blob/main/get_spectrogram.py)), un script nécessaire à la conversion des annotations LabelMe vers YOLO ([labelme2yolo.py](https://gitlab.lis-lab.fr/stephane.chavin/yolo-dyni/-/blob/main/labelme2yolo.py)), un script pour convertir des annotations d'un dataframe vers YOLO ([get_train_annot_YOLO.py](https://gitlab.lis-lab.fr/stephane.chavin/yolo-dyni/-/blob/main/get_train_annot_YOLO.py/)), un script permettant de séparer le train et la validation de manière équilibré ([get_train_val_YOLO.py](https://gitlab.lis-lab.fr/stephane.chavin/yolo-dyni/-/blob/main/get_train_val_YOLO.py)) et un script qui permet de compiler les détections, d'un modèle entrainé, dans un dataframe ([get_yolo_detection.py](https://gitlab.lis-lab.fr/stephane.chavin/yolo-dyni/-/blob/main/get_yolo_detection.py)).
......
......@@ -11,41 +11,43 @@ def arg_directory(path):
else:
raise argparse.ArgumentTypeError(f'`{path}` is not a valid path')
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
parser.add_argument('-p','--path_to_json', type=arg_directory, help = 'Path of the folder that contain the .json',required=True)
parser.add_argument('-i','--path_to_img', type=arg_directory, help = 'Path of the folder that contain the .jpg',required=True)
parser.add_argument('-d','--directory', type=arg_directory, help = 'Directory to wich modified .json files will be stored',required=True)
args = parser.parse_args()
filename = args.path_to_json
out_file = args.directory
img_path = args.path_to_img
liste_file = os.listdir(filename)
liste_file = pd.DataFrame(liste_file, columns =['fn'])
liste_file['type'] = liste_file.fn.str.split('.').str[-1]
liste_file = liste_file[liste_file.type == 'json']
liste_file.reset_index(inplace = True)
def process_json_files(json_dir, img_dir, output_dir):
json_files = [f for f in os.listdir(json_dir) if f.endswith('.json')]
for i in range (len(liste_file)):
if liste_file.fn[i][0] == '.':
liste_file = liste_file.drop(i)
for json_file in json_files:
if json_file.startswith('.'):
continue
liste_file = liste_file.reset_index()
json_path = os.path.join(json_dir, json_file)
img_path = os.path.join(img_dir, json_file.replace('.json', '.jpg'))
for i, row in liste_file.iterrows():
if len(row.fn) > 30:
data = labelme.LabelFile.load_image_file(os.path.join(img_path,str(row.fn[:-4]+'jpg')))
image_data = base64.b64encode(data).decode('utf-8')
else:
if not os.path.exists(img_path):
continue
try:
len(data)
except TypeError:
with open(img_path, 'rb') as img_file:
image_data = base64.b64encode(img_file.read()).decode('utf-8')
except FileNotFoundError:
continue
f = open(filename+row.fn,)
get_data = json.load(f)
get_data['imageData'] = image_data
get_data['imagePath'] = img_path+row.fn[:-4]+'jpg'
with open(out_file+row.fn, 'w') as f:
json.dump(get_data, f, indent=4)
with open(json_path, 'r') as f:
json_data = json.load(f)
json_data['imageData'] = image_data
json_data['imagePath'] = img_path
output_path = os.path.join(output_dir, json_file)
with open(output_path, 'w') as f:
json.dump(json_data, f, indent=4)
def main():
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
parser.add_argument('-p', '--path_to_json', type=arg_directory, help='Path to the folder containing the .json files', required=True)
parser.add_argument('-i', '--path_to_img', type=arg_directory, help='Path to the folder containing the .jpg images', required=True)
parser.add_argument('-d', '--directory', type=arg_directory, help='Directory to which modified .json files will be stored', required=True)
args = parser.parse_args()
process_json_files(args.path_to_json, args.path_to_img, args.directory)
if __name__ == "__main__":
main()
import os
import librosa
import ipdb
import glob
import argparse
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from p_tqdm import p_map
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')
......@@ -18,69 +16,65 @@ def arg_directory(path):
else:
raise argparse.ArgumentTypeError(f'`{path}` is not a valid path')
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
parser.add_argument('-f','--file', type=str,help = 'Name of the file that contain the recording to print')
parser.add_argument('-p','--path_to_data', type=arg_directory, help = 'Path of the folder that contain the recordings')
parser.add_argument('-d','--directory', type=arg_directory, help = 'Directory to wich spectrogram will be stored')
parser.add_argument('-m','--mode', type=str,choices=['unique','multiple'], help = 'Direction of the saved spectrogram')
parser.add_argument('-n','--columns_name', type=str, help = 'Name of the columns that contain the path of the .wav')
parser.add_argument('-i','--input', type=str, choices=['file','folder'], help = 'Choose "file" if you have a .csv file or "folder" to export spectrogram from all the .wav of a folder')
args = parser.parse_args()
def create_spectrogram(y, directory, filename, offset, duration):
window_size = 1024
window = np.hanning(window_size)
stft = librosa.core.spectrum.stft(y, n_fft=window_size, hop_length=512, window=window)
path_to_data = args.path_to_data
direction = args.directory
folder = 'Spectrogram/'
plt.close()
plt.figure()
#PARAMETERS
DURATION = 8
OVERLAP = 2
log_stft = np.log10(np.abs(stft))
vmin, vmax = log_stft.min(), log_stft.max()
if args.mode == 'multiple':
NB_IMG_PER_REC = 30
elif args.mode == 'unique':
NB_IMG_PER_REC = 1
plt.imshow(log_stft[::-1], aspect="auto", interpolation=None, cmap='jet', vmin=vmin, vmax=vmax)
plt.subplots_adjust(top=1, bottom=0, left=0, right=1)
if args.input == 'file':
df = pd.read_csv(args.file,low_memory=False)
df['Path'] = df[args.columns_name]
elif args.input == 'folder':
df = pd.DataFrame(glob.glob(os.path.join(path_to_data,'*')),columns = ['Path'])
name = os.path.join(directory, 'Spectrogram', f"{filename.replace('/', '_').split('.')[0]}_{offset}")
def process(x):
_, (i) = x
try:
plt.savefig(name + '.jpg')
except FileNotFoundError:
os.makedirs(os.path.join(directory, 'Spectrogram'), exist_ok=True)
plt.savefig(name + '.jpg')
for count, j in enumerate(range (NB_IMG_PER_REC)): #30*8 secondes - 30*2 secondes (overlap) = 180 secondes affichées sur 30 images : n'affiche que les 3 premières minutes d'un enregistrement
def process_recordings(args):
_, (i) = args
duration = 8
overlap = 2
for count in range(args.img_per_rec):
offset = count * (duration - overlap)
filename = str(i[0])
offset = count * (DURATION - OVERLAP)
try:
y, sr = librosa.load(filename, offset = offset, duration = DURATION, sr = None)
y, _ = librosa.load(filename, offset=offset, duration=duration, sr=None)
create_spectrogram(y, args.directory, filename, offset, duration)
except Exception:
print(filename)
continue
window_size = 1024
window = np.hanning(window_size)
stft = librosa.core.spectrum.stft(y, n_fft=window_size, hop_length=512, window=window)
plt.close()
plt.figure()
vmin = np.flipud(np.log10(np.abs(stft))).mean()
vmax = np.flipud(np.log10(np.abs(stft))).max()
plt.imshow(np.flipud(np.log10(np.abs(stft))),aspect = "auto",interpolation = None,cmap = 'jet',vmin = vmin,vmax = vmax)
plt.subplots_adjust(top=1, bottom=0, left=0, right=1)
if __name__ == "__main__":
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
parser.add_argument('-f', '--file', type=str, help='Name of the file that contains the recording to print')
parser.add_argument('-p', '--path_to_data', type=arg_directory, help='Path of the folder that contains the recordings', required=True)
parser.add_argument('-d', '--directory', type=arg_directory, help='Directory to which spectrograms will be stored', required=True)
parser.add_argument('-m', '--mode', type=str, choices=['unique', 'multiple'], help='Direction of the saved spectrogram')
parser.add_argument('-n', '--columns_name', type=str, help='Name of the columns that contain the path of the .wav')
parser.add_argument('-i', '--input', type=str, choices=['file', 'folder'], help='Choose "file" if you have a .csv file or "folder" to export spectrogram from all the .wav of a folder')
args = parser.parse_args()
name = str(i[0].replace('/','_').split('.')[0]+'_'+str(offset)) #count
try :
plt.savefig(os.path.join(direction,folder, str(name+'.jpg')))
if args.mode == 'multiple':
img_per_rec = 30
elif args.mode == 'unique':
img_per_rec = 1
except FileNotFoundError:
print('creating the directory : ',os.path.join(direction,folder))
os.mkdir(os.path.join(direction,folder))
plt.savefig(os.path.join(direction,folder, str(name+'.jpg')))
path_to_data = args.path_to_data
if args.input == 'file':
df = pd.read_csv(args.file, low_memory=False)
df['Path'] = df[args.columns_name]
elif args.input == 'folder':
df = pd.DataFrame(glob.glob(os.path.join(path_to_data, '*'), recursive=True), columns=['Path'])
p_map(process, enumerate(df.groupby('Path')), num_cpus=1, total = len(df.groupby('Path')))
print('saved to ',str(directory+folder))
p_map(process_recordings, enumerate(df.groupby('Path'), img_per_rec=img_per_rec), num_cpus=1, total=len(df.groupby('Path')))
print(f'Saved to {args.directory}/Spectrogram')
import pandas as pd
import os
import ipdb
from tqdm import tqdm
import argparse
from datetime import date
from tqdm import tqdm
def arg_directory(path):
if os.path.isdir(path):
......@@ -11,47 +10,44 @@ def arg_directory(path):
else:
raise argparse.ArgumentTypeError(f'`{path}` is not a valid path')
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
parser.add_argument('-p','--path_to_data', type=arg_directory, help = 'Path of the folder that contain the .txt files',required=True)
parser.add_argument('-d','--directory', type=arg_directory, help = 'Directory to wich the dataframe will be stored',required=True)
parser.add_argument('-t','--duration', type = int, help = 'Duration of the spectrogram', required = True)
parser.add_argument('-s','--SR', type = int, help = 'Sampling Rate of the spectrogram')
args = parser.parse_args()
annots = args.path_to_data
def process_annotations(annotations_folder, duration, sr):
today = date.today()
out_file = str('YOLO_detection'+str('_'+str(today.day)+'_'+str(today.month)))
out_file = f'YOLO_detection_{today.day}_{today.month}'
df_list = []
names = [] # Add your class names here
outdir = args.directory
for file_name in tqdm(os.listdir(annotations_folder)):
if file_name.endswith('.txt'):
file_path = os.path.join(annotations_folder, file_name)
annotation_df = pd.read_csv(file_path, sep=' ', names=['espece', 'x', 'y', 'w', 'h'])
df = pd.concat({f:pd.read_csv(os.path.join(annots, f), sep=' ', names=['espece', 'x', 'y', 'w', 'h'])
for f in tqdm(os.listdir(annots))}, names=['file'])
annotation_df['file'] = file_name
annotation_df['idx'] = annotation_df['file'].str.split('_').str[-1].str.split('.').str[0]
annotation_df['file'] = annotation_df['file'].str.rsplit('.', 1).str[0] + '.wav'
df = df.reset_index(level=[0])
df = df.reset_index()
del df['index']
df['idx'] = df.file.str.split('_').str[-1].str.split('.').str[0]
df.file = df.file.str.rsplit('.',1).str[0]+'.wav'
annotation_df['annot'] = annotation_df['espece'].apply(lambda x: names[x])
DUREE_SPECTRO = args.duration
SR = args.SR
annotation_df['midl'] = (annotation_df['x'] * duration) + annotation_df['idx'].astype(int)
annotation_df['freq_center'] = (1 - annotation_df['y']) * (sr / 2)
annotation_df['freq_min'] = annotation_df['freq_center'] - (annotation_df['h'] * (sr / 2)) / 2
annotation_df['freq_max'] = annotation_df['freq_center'] + (annotation_df['h'] * (sr / 2)) / 2
annotation_df['start'] = annotation_df['midl'] - (annotation_df['w'] * duration) / 2
annotation_df['stop'] = annotation_df['midl'] + (annotation_df['w'] * duration) / 2
annotation_df['duration'] = annotation_df['stop'] - annotation_df['start']
#put the classes here
names = []
df_list.append(annotation_df)
df['annot'] = 'None'
for j in range (len(df)):
df.loc[j,('annot')] = names[int(df.espece.iloc[j])]
result_df = pd.concat(df_list, ignore_index=True)
result_df.to_csv(os.path.join(outdir, f'{out_file}.csv'), index=False)
print(f'Saved as {os.path.join(outdir, f"{out_file}.csv")}')
print('Calculating the positions','\n')
df['midl'] = (df.x*DUREE_SPECTRO)+(df.idx.astype(int))
df['freq_center'] = (1-df.y)*(SR/2)
df['freq_min'] = df.freq_center - (df.h*(SR/2))/2
df['freq_max'] = df.freq_center + (df.h*(SR/2))/2
df['start'] = df.midl - (df.w * DUREE_SPECTRO)/2
df['stop'] = df.midl + (df.w * DUREE_SPECTRO)/2
df['duration'] = df.stop - df.start
if __name__ == "__main__":
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
parser.add_argument('-p', '--path_to_data', type=arg_directory, help='Path of the folder that contains the .txt files', required=True)
parser.add_argument('-d', '--directory', type=arg_directory, help='Directory where the dataframe will be stored', required=True)
parser.add_argument('-t', '--duration', type=int, help='Duration of the spectrogram', required=True)
parser.add_argument('-s', '--SR', type=int, help='Sampling Rate of the spectrogram')
args = parser.parse_args()
df.to_csv(os.path.join(outdir,str(out_file+'.csv')), index= False)
print('saved as ',os.path.join(outdir,str(out_file+'.csv')))
process_annotations(args.path_to_data, args.duration, args.SR)
......@@ -3,95 +3,41 @@ import pandas as pd
import librosa
import numpy as np
import matplotlib.pyplot as plt
from p_tqdm import p_map
import ipdb
import random
from datetime import date
import argparse
from p_tqdm import p_map
import cv2
import matplotlib.patches as patches
from matplotlib.patches import Rectangle
from random import randrange
from PIL import Image
from mycolorpy import colorlist as mcp
today = date.today()
def arg_directory(path):
if os.path.isdir(path):
return path
else:
raise argparse.ArgumentTypeError(f'`{path}` is not a valid path')
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
parser.add_argument('-f','--filename_path', type= str, help = 'Path and name of the file containing the annotations',required=True)
parser.add_argument('-p','--path_to_data', type=arg_directory, help = 'Path of the folder that contain the recordings',required=True)
parser.add_argument('-d','--directory', type=arg_directory, help = 'Directory to wich spectrograms and .txt files will be stored',required=True)
parser.add_argument('-m','--mode',type=str,choices=['uniform','personalized'],help = 'Choose the mode to calculate the y and height value',required=True)
parser.add_argument('-u','--unique',type=str, choices=['unique','multiple'], help = 'unique for only one spectrogram per file, multple for multiple spectrogram',required=True)
parser.add_argument('-c','--columns_name',type=str,help = 'Name of the column that contain the path',required=True)
parser.add_argument('--export',type=str, default=None, help='To export the position of the bounding box on the spectrogram',required=False)
args = parser.parse_args()
def process_annotations(file_path, duration, mode, unique, columns_name, export):
today = date.today()
directory = args.directory
df = pd.read_csv(file_path, low_memory=False)
df.rename(columns={'label': 'Code', 'annotation_initial_time': 'start', 'annotation_final_time': 'stop', 'duree': 'd_annot', 'min_frequency': 'min_freq', 'max_frequency': 'max_freq', 'avg_frequency': 'midl_y'}, inplace=True)
df['max_freq'].fillna(9000, inplace=True)
df['min_freq'].fillna(1000, inplace=True)
df['midl_y'].fillna(5000, inplace=True)
DURATION = 8
NB_CLASS = 5
df = pd.read_csv(args.filename_path, low_memory=False)
df.rename(columns={'label':'Code'},inplace=True)
df.rename(columns={'annotation_initial_time':'start'},inplace=True)
df.rename(columns={'annotation_final_time':'stop'},inplace=True)
df.rename(columns={'duree':'d_annot'},inplace=True)
df.rename(columns={'min_frequency':'min_freq'},inplace=True)
df.rename(columns={'max_frequency':'max_freq'},inplace=True)
df.rename(columns={'avg_frequency':'midl_y'},inplace=True)
tab = df.groupby('Code').count()
tab = tab.sort_values(tab.columns[0], ascending=False)[:NB_CLASS]
df = df[df.Code.isin(tab.index)]
try :
df['max_freq'].fillna(9000,inplace = True)
df['min_freq'].fillna(1000,inplace = True)
except Exception:
df['max_freq'] = 9000
df['min_freq'] = 1000
df['midl_y'] = 5000
df['d_annot'] = df.stop - df.start
df['midl'] = (df.stop + df.start) / 2
df['Path'] = df[args.columns_name]
df['Path'] = df[columns_name]
df = df[df.d_annot < 8]
df = df.reset_index()
list_espece = df.groupby('Code').count().sort_values(df.columns[0],ascending = False)
data = pd.DataFrame(columns = ['espece','ind'])
for i in range (len(list_espece)):
esp = list_espece.index[i]
new_col = pd.DataFrame([[esp,i]],columns = ['espece','ind'])
data = pd.concat([data,new_col])
liste_espece = data.espece
liste_espece.to_csv(str(directory+'liste_especes.csv'),index = False)
print('\n',data)
#color = mcp.gen_color(cmap = "Wistia", n= len(list_espece))
colors = pd.DataFrame(columns = ['color', 'species'])
for i in range (30):
r = randrange(255)
g = randrange(255)
b = randrange(255)
rand_color = (r, g, b)
new = pd.DataFrame([[rand_color, i]], columns = ['color', 'species'])
colors = pd.concat([colors, new])
# Add your class names to the 'colors' list
colors = [(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) for _ in range(30)]
def process(x):
count, (f, grp) = x
......@@ -99,29 +45,27 @@ def process(x):
duration = DURATION
while len(grp) != 0:
tab = grp[grp.midl <= grp.start.iloc[0] + 7]
fin = pd.DataFrame(columns=['id', 'x', 'y', 'width', 'height'])
duree = tab.d_annot
if len(tab) == 0:
tab = grp
print(tab)
rd = round(random.uniform(-1.5, 1.5), 2)
if args.unique == 'multiple':
if unique == 'multiple':
if tab.start.iloc[0] <= 3:
offset = 0
if tab.start.iloc[0] == 3:
elif tab.start.iloc[0] == 3:
offset = 1
if tab.start.iloc[0] >= 3:
elif tab.start.iloc[0] >= 3:
offset = tab.midl.iloc[0] - 3.5 + rd
elif args.unique == 'unique':
elif unique == 'unique':
offset = 0
window_size = 1024
window = np.hanning(window_size)
y, sr = librosa.load(filename, offset=offset, duration=duration, sr=None)
stft = librosa.core.spectrum.stft(y, n_fft=window_size, hop_length=512, window=window)
spectrum, freq, time, im = plt.specgram(y, Fs=sr, NFFT=window_size, noverlap=512, cmap='jet')
......@@ -130,7 +74,7 @@ def process(x):
vmax = np.flipud(np.log10(np.abs(stft))).max()
plt.close()
plt.imshow(np.flipud(np.log10(np.abs(stft))),aspect = "auto", interpolation = None, cmap = 'jet', vmin = vmin, vmax = vmax)
plt.imshow(np.flipud(np.log10(np.abs(stft)), aspect='auto', interpolation=None, cmap='jet', vmin=vmin, vmax=vmax))
plt.subplots_adjust(top=1, bottom=0, left=0, right=1)
for idxs, row in tab.iterrows():
......@@ -138,22 +82,21 @@ def process(x):
x_pxl = (row.midl - offset) / duration
width_pxl = (row.stop - row.start) / duration
if args.mode == 'uniform':
if mode == 'uniform':
height_pxl = 0.8
y_pxl = 0.5
else:
y_pxl = 1 - (row.midl_y / (sr / 2))
height_pxl = (row.max_freq - row.min_freq) / (sr / 2)
if height_pxl > 1:
height_pxl = 1
elif height_pxl > y_pxl * 2:
y_pxl = y_pxl + 0.5 * (height_pxl - y_pxl * 2)
annotation = pd.DataFrame([[str(data.loc[data.espece == row.Code,'ind'][0]),x_pxl,y_pxl,width_pxl,height_pxl]],columns = ['id','x', 'y', 'width', 'height'])
annotation = pd.DataFrame([[str(data.loc[data.espece == row.Code, 'ind'][0]), x_pxl, y_pxl, width_pxl, height_pxl]],
columns=['id', 'x', 'y', 'width', 'height'])
fin = pd.concat([fin, annotation])
grp = grp.drop(tab.index)
name = str(row.Path.replace('/', '_').replace('.', '_') + '_' + str(count))
......@@ -162,64 +105,65 @@ def process(x):
try:
plt.savefig(os.path.join(directory, str('images_' + str(today.day) + '_' + str(today.month)), row.Code, str(name + '.jpg')))
fin.to_csv(name_file, sep=' ', header=False, index=False)
plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all',str(name+'.jpg')))
plt.savefig(os.path.join(directory, str('images_' + str(today.day) + '_' + str(today.month)), 'all',
str(name + '.jpg')))
except:
os.mkdir(os.path.join(directory, str('images_' + str(today.day) + '_' + str(today.month))))
for especes in list_espece.index:
os.mkdir(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),especes))
os.mkdir(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all'))
os.mkdir(os.path.join(directory, str('images_' + str(today.day) + '_' + str(today.month), especes)))
os.mkdir(os.path.join(directory, str('images_' + str(today.day) + '_' + str(today.month), 'all')))
os.mkdir(os.path.join(directory, str('labels_' + str(today.day) + '_' + str(today.month))))
fin.to_csv(name_file, sep=' ', header=False, index=False)
plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),row.Code,str(name+'.jpg')))
plt.savefig(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all',str(name+'.jpg')))
plt.savefig(os.path.join(directory, str('images_' + str(today.day) + '_' + str(today.month)), row.Code,
str(name + '.jpg')))
plt.savefig(os.path.join(directory, str('images_' + str(today.day) + '_' + str(today.month)), 'all',
str(name + '.jpg')))
plt.close()
if args.export != None:
im = cv2.imread(os.path.join(directory,str('images_'+str(today.day)+'_'+str(today.month)),'all',str(name+'.jpg')))
if export:
im = cv2.imread(
os.path.join(directory, str('images_' + str(today.day) + '_' + str(today.month)), 'all', str(name + '.jpg')))
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
H, W = im.shape[0], im.shape[1]
for l in range(len(fin)):
x, y, w, h = fin.x.iloc[l] * W, fin.y.iloc[l] * H, fin.width.iloc[l] * W, fin.height.iloc[l] * H
shape1 = (int(x-(0.5*w)), int(y+(0.5*h)))
shape2 = (int(x+(0.5*w)), int(y+(0.5*h)))
shape3 = (int(x+-(0.5*w)), int(y-(0.5*h)))
shape4 = (int(x+(0.5*w)), int(y-(0.5*h)))
#rectangle text shape
shape1 = (int(x - 0.5 * w), int(y + 0.5 * h))
shape2 = (int(x + 0.5 * w), int(y + 0.5 * h))
shape3 = (int(x - 0.5 * w), int(y - 0.5 * h))
shape4 = (int(x + 0.5 * w), int(y - 0.5 * h)
)
shp1 = shape4[0] - 10, shape4[1] + 20
shp2 = shape4[0], shape4[1] + 20
shp3 = shape4[0] - 10, shape4[1]
shp4 = shape4[0], shape4[1]
#text placement
text_shape = shp1[0], shp1[1] - 5
label = str(fin.id.iloc[l])
cv2.rectangle(im, pt1=shape1, pt2=shape4, color=colors[colors.species == label].color, thickness=1)
cv2.rectangle(im, pt1=shp1, pt2=shp4, color=colors[colors.species == label].color, thickness=-1)
cv2.putText(im, label, text_shape, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
plt.imshow(im)
plt.subplots_adjust(top=1, bottom=0, left=0, right=1)
try:
plt.savefig(os.path.join(directory, str('images_annotes_'+str(today.day)+'_'+str(today.month)),str(name+'.jpg')))
plt.savefig(
os.path.join(directory, str('images_annotes_' + str(today.day) + '_' + str(today.month), str(name + '.jpg'))))
except Exception:
os.mkdir(os.path.join(directory, str('images_annotes_' + str(today.day) + '_' + str(today.month))))
plt.savefig(os.path.join(directory, str('images_annotes_'+str(today.day)+'_'+str(today.month)),str(name+'.jpg')))
plt.savefig(os.path.join(directory, str('images_annotes_' + str(today.day) + '_' + str(today.month), str(name + '.jpg'))))
plt.close()
p_map(process, enumerate(df.groupby('Path')), num_cpus=2, total=len(df.groupby('Path')))
print('saved to', directory)
if __name__ == '__main__':
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
parser.add_argument('-f', '--filename_path', type=str, help='Path and name of the file containing the annotations', required=True)
parser.add_argument('-p', '--path_to_data', type=arg_directory, help='Path of the folder that contains the recordings', required=True)
parser.add_argument('-d', '--directory', type=arg_directory, help='Directory to which spectrograms and .txt files will be stored', required=True)
parser.add_argument('-m', '--mode', type=str, choices=['uniform', 'personalized'], help='Choose the mode to calculate the y and height value', required=True)
parser.add_argument('-u', '--unique', type=str, choices=['unique', 'multiple'], help='unique for only one spectrogram per file, multiple for multiple spectrograms', required=True)
parser.add_argument('-c', '--columns_name', type=str, help='Name of the column that contains the path', required=True)
parser.add_argument('--export', type=str, default=None, help='To export the position of the bounding box on the spectrogram', required=False)
args = parser.parse_args()
process_annotations(args.filename_path, duration=8, mode=args.mode, unique=args.unique, columns_name=args.columns_name, export=args.export)
......@@ -5,7 +5,6 @@ from tqdm import tqdm
import numpy as np
import shutil
import argparse
import ipdb
def arg_directory(path):
if os.path.isdir(path):
......@@ -13,12 +12,17 @@ def arg_directory(path):
else:
raise argparse.ArgumentTypeError(f'`{path}` is not a valid path')
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
parser.add_argument('-r','--ratio', type=float, default = 0.7,help = 'Train Ratio (val = 1 - ratio)')
parser.add_argument('-p','--path_to_data', type=arg_directory, help = 'Path of the folder that contain the .txt (ending with labels/)',required=True)
parser.add_argument('-d','--direction', type=arg_directory, help = 'Directory to wich spectrogram and .txt files will be stored (different from -p)',required=True)
args = parser.parse_args()
def create_directory_if_not_exists(directory):
if not os.path.exists(directory):
os.mkdir(directory)
def copy_files_to_directory(file_list, source_dir, destination_dir):
for file_name in file_list:
source_path = os.path.join(source_dir, f'{file_name}.txt')
destination_path = os.path.join(destination_dir, f'{file_name}.txt')
shutil.copy2(source_path, destination_path)
def process_data(args):
path = args.path_to_data
direction = args.direction
......@@ -60,36 +64,35 @@ train = pd.DataFrame(df.groupby('file').count().index,columns = ['file'])
val.file = val.file.str.rsplit('.', 1).str[0]
train.file = train.file.str.rsplit('.', 1).str[0]
isExist = os.path.exists(os.path.join(direction,'images'))
if not isExist:
os.mkdir(os.path.join(direction,'images'))
os.mkdir(os.path.join(direction,'images/train'))
os.mkdir(os.path.join(direction,'images/val'))
os.mkdir(os.path.join(direction,'labels'))
os.mkdir(os.path.join(direction,'labels/train'))
os.mkdir(os.path.join(direction,'labels/val'))
for i,row in tqdm(val.iterrows(), total=val.shape[0]):
shutil.copy2(os.path.join(path, str(row.file+'.txt')), os.path.join(direction, str('labels/val/'+row.file+'.txt')))
shutil.copy2(os.path.join(path, str('../images/all/'+row.file+'.jpg')), os.path.join(direction, str('images/val/'+row.file+'.jpg')))
create_directory_if_not_exists(os.path.join(direction, 'images'))
create_directory_if_not_exists(os.path.join(direction, 'images/train'))
create_directory_if_not_exists(os.path.join(direction, 'images/val'))
create_directory_if_not_exists(os.path.join(direction, 'labels'))
create_directory_if_not_exists(os.path.join(direction, 'labels/train'))
create_directory_if_not_exists(os.path.join(direction, 'labels/val'))
for i,row in tqdm(train.iterrows(), total=train.shape[0]):
copy_files_to_directory(val.file, path, os.path.join(direction, 'labels/val'))
copy_files_to_directory(val.file, os.path.join(path, '../images/all'), os.path.join(direction, 'images/val'))
shutil.copy2(os.path.join(path, str(row.file+'.txt')), os.path.join(direction, str('labels/train/'+row.file+'.txt')))
shutil.copy2(os.path.join(path, str('../images/all/'+row.file+'.jpg')), os.path.join(direction, str('images/train/'+row.file+'.jpg')))
copy_files_to_directory(train.file, path, os.path.join(direction, 'labels/train'))
copy_files_to_directory(train.file, os.path.join(path, '../images/all'), os.path.join(direction, 'images/train'))
try:
liste_espece = pd.read_csv(str(path+'../liste_especes.csv'))
liste_espece = pd.read_csv(os.path.join(path, '../liste_especes.csv'))
except Exception:
print('No species list detected, please add to ',str(direction+'custom_data.yaml'))
print('No species list detected, please add it to', os.path.join(direction, 'custom_data.yaml'))
with open(str(direction+'custom_data.yaml'),'w') as f:
f.write('train : '+direction+'images/train\n')
f.write('val : '+direction+'images/val\n')
f.write('nc : ' + str(len(liste_espece))+'\n')
try:
f.write('names : ' + str(liste_espece.espece.tolist()))
except Exception:
print('.yaml saved to ',str(direction+'custom_data.yaml'))
\ No newline at end of file
with open(os.path.join(direction, 'custom_data.yaml'), 'w') as f:
f.write(f'train: {os.path.join(direction, "images/train")}\n')
f.write(f'val: {os.path.join(direction, "images/val")}\n')
f.write(f'nc: {len(liste_espece)}\n')
f.write(f'names: {liste_espece.espece.tolist()}')
if __name__ == '__main__':
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
parser.add_argument('-r', '--ratio', type=float, default=0.7, help='Train Ratio (val = 1 - ratio)')
parser.add_argument('-p', '--path_to_data', type=arg_directory, help='Path of the folder that contains the .txt (ending with labels/)', required=True)
parser.add_argument('-d', '--direction', type=arg_directory, help='Directory to which spectrogram and .txt files will be stored (different from -p)', required=True)
args = parser.parse_args()
process_data(args)
import pandas as pd
import os
import ipdb
from tqdm import tqdm
import argparse
from datetime import date
......@@ -11,18 +10,17 @@ def arg_directory(path):
else:
raise argparse.ArgumentTypeError(f'`{path}` is not a valid path')
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
parser.add_argument('-p','--path_to_data', type=arg_directory, help = 'Path of the folder that contain the .txt files',required=True)
parser.add_argument('-d','--direction', type=arg_directory, help = 'Directory to wich the dataframe will be stored',required=True)
args = parser.parse_args()
def process_data(args):
annots = args.path_to_data
today = date.today()
out_file = str('YOLO_detection_'+str('_'+str(today.day)+'_'+str(today.month)))
out_file = f'YOLO_detection_{today.day}_{today.month}'
outdir = args.direction
# Define constants
DUREE_SPECTRO = 8
OVERLAP = 2
# Load and process data
df = pd.concat({f: pd.read_csv(os.path.join(annots, f), sep=' ', names=['espece', 'x', 'y', 'w', 'h', 'conf'])
for f in tqdm(os.listdir(annots))}, names=['file'])
......@@ -32,15 +30,19 @@ del df['index']
df['idx'] = df.file.str.split('_').str[-1].str.split('.').str[0]
df.file = df.file.str.rsplit('.', 1).str[0] + '.wav'
DUREE_SPECTRO = 8
OVERLAP = 2
#put the classes here
# Define class names
names = []
df['annot'] = 'None'
for j in range (len(df)):
df['annot'].iloc[j] = names[int(df.espece.iloc[j])]
df['annot'] = df['espece'].apply(lambda x: names[int(x)])
# Save the processed DataFrame to a CSV file
df.to_csv(os.path.join(outdir, f'{out_file}.csv'), index=False)
print(f'Saved as {os.path.join(outdir, f"{out_file}.csv")}')
if __name__ == "__main__":
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
parser.add_argument('-p', '--path_to_data', type=arg_directory, help='Path of the folder that contains the .txt files', required=True)
parser.add_argument('-d', '--direction', type=arg_directory, help='Directory to which the dataframe will be stored', required=True)
args = parser.parse_args()
df.to_csv(os.path.join(outdir,str(out_file+'.csv')), index= False)
print('saved as ',os.path.join(outdir,str(out_file+'.csv')))
process_data(args)
import os
import json
import random
import base64
import shutil
import argparse
from pathlib import Path
from glob import glob
from pathlib import Path
def arg_directory(path):
if os.path.isdir(path):
......@@ -13,55 +12,61 @@ def arg_directory(path):
else:
raise argparse.ArgumentTypeError(f'`{path}` is not a valid path')
parser = argparse.ArgumentParser(description='Convert LabelMe annotations to YOLO compatible')
parser.add_argument('-p','--path_to_data',type=arg_directory,help='Path to LabelMe annotations')
parser.add_argument('-d','--directory',type=arg_directory,help='Directory to which YOLO annotations will be stored')
args = parser.parse_args()
# YOLO metadata and files
yolo_names = []
yolo_names_path = os.path.join(args.directory, 'custom.names')
# Convert image annotations
for index, labelme_annotation_path in enumerate(glob(f'{args.path_to_data}/*.json')):
image_id = os.path.basename(labelme_annotation_path).rstrip('.json')
labelme_annotation_file = open(labelme_annotation_path, 'r')
def convert_labelme_to_yolo(labelme_annotation_path, yolo_directory):
# Load LabelMe annotation
image_id = Path(labelme_annotation_path).stem
with open(labelme_annotation_path, 'r') as labelme_annotation_file:
labelme_annotation = json.load(labelme_annotation_file)
yolo_annotation_path = os.path.join(args.directory,'labels', image_id + '.txt')
yolo_annotation_file = open(yolo_annotation_path, 'w')
yolo_image = base64.decodebytes(labelme_annotation['imageData'].encode())
yolo_image_path = os.path.join(args.directory, 'labels', image_id + '.jpg')
# YOLO annotation and image paths
yolo_annotation_path = os.path.join(yolo_directory, 'labels', f'{image_id}.txt')
yolo_image_path = os.path.join(yolo_directory, 'images', f'{image_id}.jpg')
with open(yolo_annotation_path, 'w') as yolo_annotation_file:
yolo_image_data = base64.b64decode(labelme_annotation['imageData'])
# Write YOLO image (and it to the list)
yolo_image_file = open(yolo_image_path, 'wb')
yolo_image_file.write(yolo_image)
yolo_image_file.close()
# Write YOLO image
with open(yolo_image_path, 'wb') as yolo_image_file:
yolo_image_file.write(yolo_image_data)
# Write YOLO image annotation
for shape in labelme_annotation['shapes']:
if shape['shape_type'] != 'rectangle':
print(
f'Invalid type `{shape["shape_type"]}` in annotation `annotation_path`')
print(f'Invalid type `{shape["shape_type"]}` in annotation `{labelme_annotation_path}`')
continue
if shape['label'] not in yolo_names:
yolo_names.append(shape['label'])
points = shape['points']
scale_width = 1.0 / labelme_annotation['imageWidth']
scale_height = 1.0 / labelme_annotation['imageHeight']
width = abs(points[1][0] - points[0][0]) * scale_width
height = abs(points[1][1] - points[0][1]) * scale_height
x = (abs(points[1][0] + points[0][0]) / 2) * scale_width
y = (abs(points[1][1] + points[0][1]) / 2) * scale_height
label = shape['label']
x1, y1 = shape['points'][0]
x2, y2 = shape['points'][1]
width = x2 - x1
height = y2 - y1
x_center = (x1 + x2) / 2
y_center = (y1 + y2) / 2
object_class = shape['label']
yolo_annotation_file.write(f'{object_class} {x} {y} {width} {height}\n')
annotation_line = f'{label} {x_center} {y_center} {width} {height}\n'
yolo_annotation_file.write(annotation_line)
def main(args):
yolo_names = set()
for labelme_annotation_path in glob(f'{args.path_to_data}/*.json'):
convert_labelme_to_yolo(labelme_annotation_path, args.directory)
with open(labelme_annotation_path, 'r') as labelme_annotation_file:
labelme_annotation = json.load(labelme_annotation_file)
for shape in labelme_annotation['shapes']:
yolo_names.add(shape['label'])
# Write YOLO names
yolo_names_file = open(yolo_names_path, 'w')
yolo_names_file.write(os.linesep.join(yolo_names))
yolo_names_file.close()
yolo_names_path = os.path.join(args.directory, 'custom.names')
with open(yolo_names_path, 'w') as yolo_names_file:
yolo_names_file.write('\n'.join(yolo_names))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Convert LabelMe annotations to YOLO compatible')
parser.add_argument('-p', '--path_to_data', type=arg_directory, help='Path to LabelMe annotations', required=True)
parser.add_argument('-d', '--directory', type=arg_directory, help='Directory to which YOLO annotations will be stored', required=True)
args = parser.parse_args()
main(args)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment