Skip to content
Snippets Groups Projects
Commit 79b22109 authored by Stephane Chavin's avatar Stephane Chavin
Browse files

init

parent eab75395
Branches
No related tags found
No related merge requests found
import os
import pandas as pd
import librosa
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import ipdb
import argparse
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
parser.add_argument('-f','--file', type=str,required=True,help = 'Name of the file that contain the recording to print')
parser.add_argument('-p','--path_to_data', type=str, help = 'Path of the folder that contain the recodings',required=True)
args = parser.parse_args()
path_to_data = args.path_to_data
folder = 'Spectrogramme/'
count = 0
df = pd.read_csv(str(args.file+'.csv'))
for i in tqdm(df.groupby('dir')): #la colonne dir correspond au path
count = 0
for j in range (30): #30*8 secondes - 30*2 secondes (overlap) = 180 secondes affichées sur 30 images : n'affiche que les 3 premières minutes d'un enregistrement
filename = str(path_to_data+i[0])
if count ==0:
duration = 8
offset = 0
else :
duration = 8
ffset = offset + 6
try:
y, sr = librosa.load(filename, offset = offset, duration = duration)
except:
continue
window_size = 1024
window = np.hanning(window_size)
stft = librosa.core.spectrum.stft(y, n_fft=window_size, hop_length=512, window=window)
out = 2 * np.abs(stft) / np.sum(window)
plt.close()
plt.figure()
vmin = np.flipud(np.log10(np.abs(stft))).mean()
vmax = np.flipud(np.log10(np.abs(stft))).max()
a = plt.specgram(y, Fs=sr, NFFT=1024, noverlap=512, cmap='jet', vmin=vmin*10, vmax=vmax*10)
plt.imshow(np.flipud(np.log10(np.abs(stft))),aspect = "auto",interpolation = None,cmap = 'jet',vmin = vmin,vmax = vmax)
plt.subplots_adjust(top=1, bottom=0, left=0, right=1)
name = str(i[0].replace('/','_').split('.')[0]+'_'+str(count))
try :
plt.savefig(str(folder+name+'.jpg'))
except FileNotFoundError:
os.mkdir(folder)
plt.savefig(str(folder+name+'.jpg'))
count+=1
import os
import pandas as pd
import librosa
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import ipdb
import random
from datetime import date
import argparse
import matplotlib.patches as patches
from matplotlib.patches import Rectangle
from PIL import Image
from mycolorpy import colorlist as mcp
today = date.today()
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
parser.add_argument('-f','--filename_path', type=str, help = 'Path and name of the file containing the annotations',required=True)
parser.add_argument('-p','--path_to_data', type=str, help = 'Path of the folder that contain the recodings',required=True)
parser.add_argument('-d','--direction', type=str, help = 'Direction to export the spectrogram and the .txt files',required=True)
parser.add_argument('-m','--mode',type=str,choices=['uniform','personalized'],help = 'Choose the mode to calculate the y and height value',required=True)
parser.add_argument('--export',type=str, default=None, help='To show the position of the bounding box on the spectrogram',required=False)
args = parser.parse_args()
direction = args.direction
count = 0
df = pd.read_csv(args.filename_path, low_memory=False)
df.rename(columns={'label':'Code'},inplace=True) #annot
df.rename(columns={'annotation_initial_time':'start'},inplace=True) #On (sec)
df.rename(columns={'annotation_final_time':'stop'},inplace=True) #Off (sec)
df.rename(columns={'duree':'d_annot'},inplace=True) #duree
df.rename(columns={'min_frequency':'min_freq'},inplace=True)
df.rename(columns={'max_frequency':'max_freq'},inplace=True)
df.rename(columns={'avg_frequency':'midl_y'},inplace=True)
df['d_annot'] = df.stop - df.start
df['midl'] = (df.stop + df.start)/2
df['Path'] = ''
df = df[df.d_annot<7]
df = df.reset_index()
for w in range (len(df)):
df.loc[w,'Path']=str(args.path_to_data+ df.code_unique[w]+"_split_" + str(df.chunk_initial_time[w]) + "_" + str(df.chunk_final_time[w]))
list_espece = df.groupby('Code').count().sort_values(df.columns[0],ascending = False)
data = pd.DataFrame(columns = ['espece','ind'])
for i in range (len(list_espece)):
esp = list_espece.index[i]
new_col = pd.DataFrame([[esp,i]],columns = ['espece','ind'])
data = pd.concat([data,new_col])
print('\n',data)
color = mcp.gen_color(cmap = "Wistia", n= len(list_espece))
for f, grp in tqdm(df.groupby('Path')):
#i = 9
filename = str(f)
tqdm.write(filename)
duration = 5 ##CHOOSE THE DURATION
while len(grp) != 0:
tab = grp[grp.midl <= grp.start.iloc[0]+7]
fin = pd.DataFrame(columns = ['id','x', 'y', 'width', 'height'])
duree = tab.d_annot
if len(tab)==0:
tab = grp
print(tab)
rd = round(random.uniform(-1.5,1.5),2)
if False:
if tab.start.iloc[0] <= 3:
offset = 0
if tab.start.iloc[0] == 3:
offset = 1
if tab.start.iloc[0] >= 3:
offset = tab.midl.iloc[0] - 3.5 + rd
else:
offset= 0
y, sr = librosa.load(filename, offset = offset, duration = duration)
for idxs, row in tab.iterrows():
window_size = 1024
window = np.hanning(window_size)
stft = librosa.core.spectrum.stft(y, n_fft=window_size, hop_length=512, window=window)
out = 2 * np.abs(stft) / np.sum(window)
plt.close()
plt.figure()
vmin = np.flipud(np.log10(np.abs(stft))).mean()
vmax = np.flipud(np.log10(np.abs(stft))).max()
a = plt.specgram(y, Fs=sr, NFFT=1024, noverlap=512, cmap='jet', vmin=vmin*10, vmax=vmax*10)
plt.imshow((np.log10(np.abs(stft))),aspect = "auto",interpolation = None,cmap = 'jet',vmin = vmin,vmax = vmax,origin = 'lower')
freq = a[1]
time = a[2]
species = row.Code
plt.subplots_adjust(top=1, bottom=0, left=0, right=1)
x_pxl = (row.midl - row.chunk_initial_time) / duration
width_pxl = (row.stop-row.start)/duration
if args.mode == 'uniform':
height_pxl = 0.8 #(418-93)/513
y_pxl = 0.4
else:
y_pxl = 1 - (row.midl_y / (sr/2))
height_pxl = (row.max_freq - row.min_freq)/(sr/2)
if height_pxl > 1:
height_pxl = 1
elif height_pxl > y_pxl*2:
y_pxl=y_pxl+0.5*(height_pxl-y_pxl*2)
annotation = pd.DataFrame([[str(data.loc[data.espece == row.Code,'ind'][0]),x_pxl,y_pxl,width_pxl,height_pxl]],columns = ['id','x', 'y', 'width', 'height'])
fin = pd.concat([fin,annotation])
grp = grp.drop(tab.index)
name = str(row.Path.replace('/','_').replace('.','_')+'_'+str(count))
#name = str(row.Path.replace('/','_').split('.')[0]+'_'+str(count))
name_file = str(direction+'fichiers_annot_'+str(today.day)+'_'+str(today.month)+'/'+name+'.txt')
try :
plt.savefig(str(direction+"images_annot_"+str(today.day)+'_'+str(today.month)+'/'+row.Code+"/"+name+'.jpg'))
fin.to_csv(name_file,sep =' ',header=False,index=False)
plt.savefig(str(direction+"images_annot_"+str(today.day)+'_'+str(today.month)+'/'+"all"+"/"+name+'.jpg'))
except :
os.mkdir(str(direction+"images_annot_"+str(today.day)+'_'+str(today.month)+'/'))
for especes in list_espece.index:
os.mkdir(str(direction+"images_annot_"+str(today.day)+'_'+str(today.month)+'/'+especes))
os.mkdir(str(direction+"images_annot_"+str(today.day)+'_'+str(today.month)+'/'+'all/'))
os.mkdir(str(direction+'fichiers_annot_'+str(today.day)+'_'+str(today.month)+'/'))
fin.to_csv(name_file,sep =' ',header=False,index=False)
plt.savefig(str(direction+"images_annot_"+str(today.day)+'_'+str(today.month)+'/'+row.Code+"/"+name+'.jpg'))
plt.savefig(str(direction+"images_annot_"+str(today.day)+'_'+str(today.month)+'/'+"all"+"/"+name+'.jpg'))
if args.export == None:
for l in range(len(fin)):
t_minus = (row.d_annot*len(time))/duration
plt.imshow(np.flipud(np.log10(np.abs(stft))),aspect = "auto",interpolation = None,cmap = 'jet',vmin = vmin,vmax = vmax)
try :
plt.gca().add_patch(Rectangle(((fin.x.iloc[l]*len(time))-t_minus, ((fin.y.iloc[l]*len(freq))-0.5*fin.height.iloc[l]*len(freq))), fin.width.iloc[l]*len(time), fin.height.iloc[l]*len(freq), linewidth=3, edgecolor=color[int(fin.id.iloc[l])], facecolor='none'))
except IndexError:
ipdb.set_trace()
try:
plt.savefig(str(direction+"images_annotes_"+str(today.day)+'_'+str(today.month)+'/'+name+'.jpg'))
except Exception:
os.mkdir(str(direction+"images_annotes_"+str(today.day)+'_'+str(today.month)+'/'))
plt.savefig(str(direction+"images_annotes_"+str(today.day)+'_'+str(today.month)+'/'+name+'.jpg'))
plt.close()
count +=1
import yaml
import os
import pandas as pd
from tqdm import tqdm
import numpy as np
import shutil
import argparse
import ipdb
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='RATIO')
parser.add_argument('-r','-ratio', type=float, default=0.7,help = 'Train Ratio (val = 1 - ratio)')
parser.add_argument('-p','--path_to_data', type=str, help = 'Path of the folder that contain the recodings',required=True)
parser.add_argument('-d','--direction', type=str, help = 'Direction to export the spectrogram and the .txt files',required=True)
args = parser.parse_args()
path = str(args.path_to_txt)
direction = str(args.direction)
files = os.listdir(path)
files = pd.DataFrame(files, columns = ['file'])
files['annot'] = 'None'
df = pd.DataFrame(columns = ['file','espece','x','y','w','h',])
for i in tqdm(os.listdir(path)):
try :
table = pd.read_csv(str(path+i), sep=' ')
except Exception:
continue
l=[]
tab= pd.DataFrame(columns = ['file','espece','x','y','w','h'])
tab2 = pd.DataFrame(columns = ['file','espece','x','y','w','h'])
if len (table) == 0:
for j in table.columns:
l.append(j)
table = pd.DataFrame([[i, int(l[0]),l[1],l[2],l[3],l[4]]], columns = ['file','espece','x','y','w','h'])
df = pd.concat([df,table])
else:
nb = len(table)
for j in table.columns:
l.append(j)
try :
tab2 = pd.DataFrame([[i, int(l[0]),l[1],l[2],l[3],l[4]]],columns = ['file','espece','x','y','w','h'])
except:
ipdb.set_trace()
for v in range(nb):
new = pd.DataFrame([[i, table.iloc[v][0],int(table.iloc[v][1]), table.iloc[v][2],table.iloc[v][3],table.iloc[v][4]]], columns = ['file','espece','x','y','w','h'])
tab = pd.concat([tab , new])
table = pd.concat([tab2, tab])
df = pd.concat([df, table])
df.reset_index(inplace = True)
tab = df.groupby('espece').count()
compte = pd.DataFrame(np.zeros((len(tab)+1,1)),columns = ['nombre'])
nb_val = int(tab.file.min()*(1-args.ratio))
esp_min = tab[tab.file == tab.file.min()].index[0]
val = df[df.file.isin(df[df.espece == esp_min].sample(int(nb_val)).file)]
df.drop(val.index,inplace=True)
for n in tab.iterrows():
if n[0] == esp_min:
continue
else:
nb_val = len(df[df.espece == n[0]])*(1-args.ratio)
new_row = df[df.file.isin(df[df.espece == n[0]].sample(int(nb_val)).file)]
val = pd.concat((val, new_row))
df = df.drop(new_row.index)
compte = val.groupby('espece').count()
val = pd.DataFrame(val.groupby('file').count().index,columns = ['file'])
train = pd.DataFrame(df.groupby('file').count().index,columns = ['file'])
for i in range (len(val)):
val.file.iloc[i] = val.file.iloc[i][:-4]
for i in range (len(train)):
train.file.iloc[i] = train.file.iloc[i][:-4]
isExist = os.path.exists(str(direction+'images'))
if not isExist:
os.mkdir(str(direction+'images'))
os.mkdir(str(direction+'images/train'))
os.mkdir(str(direction+'images/val'))
os.mkdir(str(direction+'labels'))
os.mkdir(str(direction+'labels/train'))
os.mkdir(str(direction+'labels/val'))
for i,row in val.iterrows():
# if len(row.file)>15:
shutil.copy2(str(path+'labels/'+row.file+'.txt'),str(direction+'labels/val/'+row.file+'.txt'))
shutil.copy2(str(path+'../images/all/'+row.file+'.jpg'),str(direction+'images/val/'+row.file+'.jpg'))
for i,row in train.iterrows():
# if len(row.file)>15:
shutil.copy2(str(path+'labels/'+row.file+'.txt'),str(direction+'labels/train/'+row.file+'.txt'))
shutil.copy2(str(path+'../images/all/'+row.file+'.jpg'),str(direction+'images/train/'+row.file+'.jpg'))
with open(str(direction+'custom_data.yaml','w') as f:
f.write('train : '+direction+'images/train\n')
f.write('test : '+direction+'images/val\n')
f.write('nc : ' + len(tab)+'\n')
f.write('names : ' + tab.columns)
import pandas as pd
import os
import ipdb
from tqdm import tqdm
import argparse
from datetime import date
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
parser.add_argument('-p','--path_to_data', type=str, help = 'Path of the folder that contain the recodings',required=True)
parser.add_argument('-d','--direction', type=str, help = 'Direction to export the spectrogram and the .txt files',required=True)
args = parser.parse_args()
annots = str(args.path_to_detection)
today = date.today()
model_name = '' #put the model name here
out_file = str('YOLO_detection_'+model_name+str('_'+today.day+'_'+today.month))
outdir = str(args.direction)
df = pd.DataFrame(columns = ['file','idx','espece','x','y','w','h','conf'])
for i in tqdm(os.listdir(annots)):
if i == 'list_results_YOLO.csv':
continue
else:
table = pd.read_csv(str(annots+i), sep=' ')
l=[]
tab= pd.DataFrame(columns = ['file','idx','espece','x','y','w','h','conf'])
tab2 = pd.DataFrame(columns = ['file','idx','espece','x','y','w','h','conf'])
if len (table) == 0:
for j in table.columns:
l.append(j)
name = i.split('.')[0]
if len(name.split('_')[-1]) == 2:
name = name.split('.')[0][0:-3]
elif len(name.split('_')[-1]) == 3:
name = name.split('.')[0][0:-4]
else :
name= name.split('.')[0][0:-2]
name = str(name+'.wav')
idx = i.split('_')[-1]
idx = idx.split('.')[0]
table = pd.DataFrame([[name, idx, l[0],l[1],l[2],l[3],l[4],l[5]]], columns = ['file', 'idx','espece','x','y','w','h','conf'])
df = pd.concat([df,table])
else:
nb = len(table)
name = i.split('.')[0]
if len(name.split('_')[-1]) == 2:
name = name.split('.')[0][0:-3]
elif len(name.split('_')[-1]) == 3:
name = name.split('.')[0][0:-4]
else :
name= name.split('.')[0][0:-2]
name = str(name+'.wav' )
idx = i.split('_')[-1]
idx = idx.split('.')[0]
for j in table.columns:
l.append(j)
try :
tab2 = pd.DataFrame([[name, idx, l[0],l[1],l[2],l[3],l[4],l[5]]],columns = ['file', 'idx','espece','x','y','w','h','conf'])
except:
ipdb.set_trace()
for v in range(nb):
new = pd.DataFrame([[name, idx, table.iloc[v][0],table.iloc[v][1], table.iloc[v][2],table.iloc[v][3],table.iloc[v][4],table.iloc[v][5]]], columns = ['file', 'idx','espece','x','y','w','h','conf'])
tab = pd.concat([tab , new])
table = pd.concat([tab2, tab])
df = pd.concat([df, table])
#put the classes here
names = [ 'wtsp','coye','oven','swsp','rwbl','nawa','heth','alfl','mawa','btnw','yrwa','veer','swth','rcki','btbw','amre','sosp','lisp','gcki','revi','blja']
df['annot'] = 'None'
for j in range (len(df)):
df['annot'].iloc[j] = names[int(df.espece.iloc[j])]
df.to_csv(str(outdir+out_file+'.csv'), index= False)
"""
Converts LabelMe annotations to annotations compatible with YOLO.
The script does the following:
- cleans (!) the output directory and prepare it for training,
- splits the dataset on validation and training,
- converts all LabelMe annoations (*.json) to YOLO annoations (*.txt) and
- creates YOLO metadata (`.data`, `.names`, `train.txt` and `valid.txt`)
"""
import os
import json
import random
import base64
import shutil
import argparse
from pathlib import Path
from glob import glob
#import ipdb;ipdb.set_trace()
# Parse arguments
def arg_directory(path):
if os.path.isdir(path):
return path
else:
raise argparse.ArgumentTypeError(f'`{path}` is not a valid path')
parser = argparse.ArgumentParser(
description='Convert LabelMe annotations to YOLO compatible'
)
parser.add_argument('--input',
type=arg_directory,
help='Directory to LabelMe annotations',
default=os.path.join(
str(Path.home()), 'Pictures', 'Webcam')
)
parser.add_argument('--output',
type=arg_directory,
help='Directory to which YOLO annotations will be stored',
default='./yolo'
)
parser.add_argument('--ratio',
type=float,
help='Training ratio',
default=0.9
)
args = parser.parse_args()
# YOLO metadata and files
yolo_names = []
yolo_train_dir_path = os.path.join(args.output, 'train')
yolo_valid_dir_path = os.path.join(args.output, 'valid')
yolo_backup_dir_path = os.path.join(args.output, 'backup')
yolo_list_file_train_path = os.path.join(args.output, 'train.txt')
yolo_list_file_valid_path = os.path.join(args.output, 'valid.txt')
yolo_data_path = os.path.join(args.output, 'custom.data')
yolo_names_path = os.path.join(args.output, 'custom.names')
# Prepare output directory
shutil.rmtree(yolo_train_dir_path)
os.mkdir(yolo_train_dir_path)
shutil.rmtree(yolo_valid_dir_path)
os.mkdir(yolo_valid_dir_path)
if not os.path.isdir(yolo_backup_dir_path):
os.mkdir(yolo_backup_dir_path)
# Convert image annotations
yolo_list_file = {
'train': open(yolo_list_file_train_path, 'w'),
'valid': open(yolo_list_file_valid_path, 'w')
}
for index, labelme_annotation_path in enumerate(glob(f'{args.input}/*.json')):
image_id = os.path.basename(labelme_annotation_path).rstrip('.json')
train_or_valid = 'train' if random.random() < args.ratio else 'valid'
labelme_annotation_file = open(labelme_annotation_path, 'r')
labelme_annotation = json.load(labelme_annotation_file)
yolo_annotation_path = os.path.join(args.output, train_or_valid, image_id + '.txt')
yolo_annotation_file = open(yolo_annotation_path, 'w')
yolo_image = base64.decodebytes(labelme_annotation['imageData'].encode())
yolo_image_path = os.path.join(args.output, train_or_valid, image_id + '.jpg')
# Write YOLO image (and it to the list)
yolo_image_file = open(yolo_image_path, 'wb')
yolo_image_file.write(yolo_image)
yolo_image_file.close()
yolo_list_file[train_or_valid].write(f'{yolo_image_path}{os.linesep}')
# Write YOLO image annotation
for shape in labelme_annotation['shapes']:
if shape['shape_type'] != 'rectangle':
print(
f'Invalid type `{shape["shape_type"]}` in annotation `annotation_path`')
continue
if shape['label'] not in yolo_names:
yolo_names.append(shape['label'])
points = shape['points']
scale_width = 1.0 / labelme_annotation['imageWidth']
scale_height = 1.0 / labelme_annotation['imageHeight']
width = abs(points[1][0] - points[0][0]) * scale_width
height = abs(points[1][1] - points[0][1]) * scale_height
x = (abs(points[1][0] + points[0][0]) / 2) * scale_width
y = (abs(points[1][1] + points[0][1]) / 2) * scale_height
# object_class = yolo_names.index(shape['label'])
object_class = shape['label']
yolo_annotation_file.write(f'{object_class} {x} {y} {width} {height}\n')
yolo_annotation_file.close()
yolo_list_file['train'].close()
yolo_list_file['valid'].close()
# Write YOLO names
yolo_names_file = open(yolo_names_path, 'w')
yolo_names_file.write(os.linesep.join(yolo_names))
yolo_names_file.close()
# Write YOLO data configuration
yolo_data_file = open(yolo_data_path, 'w')
yolo_data_file.write(f'classes = {len(yolo_names)}{os.linesep}')
yolo_data_file.write(f'train = {yolo_list_file_train_path}{os.linesep}')
yolo_data_file.write(f'valid = {yolo_list_file_valid_path}{os.linesep}')
yolo_data_file.write(f'names = {yolo_names_path}{os.linesep}')
yolo_data_file.write(f'backup = {yolo_backup_dir_path}{os.linesep}')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment