diff --git a/custom_hyp.yaml b/custom_hyp.yaml index cebb06f322e9174a14b5fb85c093bcb7d50af333..3e78301a5764d142b4ed182b39e5e1a26396993e 100644 --- a/custom_hyp.yaml +++ b/custom_hyp.yaml @@ -29,3 +29,5 @@ fliplr: 0.0 # image flip left-right (probability) mosaic: 0.0 # image mosaic (probability) mixup: 0.3 # image mixup (probability) copy_paste: 0.1 # segment copy-paste (probability) +ToGray: 0.0 # image in Grayscale (probability) +GaussNoise: 0.0 # adding Gaussian/white noise (probability) diff --git a/get_spectrogram.py b/get_spectrogram.py index 556ce00cfd53e70dae72f427370d93caea20761c..767ebe72b5a8b2bd1d713b29741526926a117c0d 100755 --- a/get_spectrogram.py +++ b/get_spectrogram.py @@ -51,12 +51,14 @@ def main(data, arguments): name = os.path.join(arguments.directory, folder, f'{file}_{offset}') utils.create_spectrogram( - sig, arguments.directory, name, window_size=arguments.window, - overlap=arguments.hop) - return folder + sig, arguments.directory, name, window_size=arguments.window, + overlap=arguments.hop) + except Exception as error: + folder = 'spectrograms' print(f'`{filename}` cannot be open : {error}') + return folder if __name__ == "__main__": parser = argparse.ArgumentParser( @@ -67,8 +69,8 @@ if __name__ == "__main__": help='Directory to which spectrograms will be stored') parser.add_argument('--duration', type=int, help='Duration for each spectrogram', default=8) - parser.add_argument( - '--window', type=int, help='Window size for the Fourier Transform', default=1024) + parser.add_argument('--window', type=int, help='Window size for the Fourier Transform', + default=1024) parser.add_argument('--hop', type=float, help='Ratio of hop in window : 50%% = 0.5', default=0.5) parser.add_argument('--high', type=int, diff --git a/get_train_annot.py b/get_train_annot.py index fa7f0834b15d1f8146dcd90eaad087f884e0430d..9c1749637ceed310001cec5bcfadff538440dd8d 100755 --- a/get_train_annot.py +++ b/get_train_annot.py @@ -3,18 +3,15 @@ import argparse import random import os -import pandas as pd +import sys import numpy as np -import matplotlib.pyplot as plt -import soundfile as sf - import cv2 -import sys -import subprocess import utils - +import matplotlib.pyplot as plt +import soundfile as sf from p_tqdm import p_map from tqdm import tqdm +import pandas as pd def process(entry, arguments, species_list): @@ -63,13 +60,13 @@ def process(entry, arguments, species_list): sig, fs = sf.read(filename, start=int( offset*fs), stop=int((offset+arguments.duration)*fs), always_2d=True) # Load the signal sig = sig[:, 0] # Only take channel 0 - if arguments.rf == None: + if not arguments.rf: arguments.rf = fs # Apply resample and low/high pass filter sig = utils.signal_processing( sig, rf=arguments.rf, fs=fs, high=arguments.high, low=arguments.low) fig = utils.create_spectrogram( - sig, arguments.directory, names=None, + sig, arguments.directory, names=None, window_size=arguments.window, overlap=arguments.hop) @@ -105,13 +102,14 @@ def process(entry, arguments, species_list): for folder in ['images', 'labels', 'images/all', 'annotated_images']: utils.create_directory(os.path.join( arguments.directory, folder)) - for specie_num in species_list: + for specie_num in species_list[species_list.columns[0]]: utils.create_directory(os.path.join( arguments.directory, 'images', str(specie_num))) # Save the images and annotation plt.savefig(os.path.join(arguments.directory, - 'images', str(species_list[species_list.species == - specie].index[0]), f'{name}.jpg')) + 'images', + str(species_list[species_list.species ==specie].species.iloc[0]), + f'{name}.jpg')) annotation.to_csv(name_file, sep=' ', header=False, index=False) plt.savefig(os.path.join(arguments.directory, 'images', 'all', f'{name}.jpg')) @@ -167,13 +165,15 @@ if __name__ == '__main__': parser.add_argument('--overlap', type=int, help='Overlap in seconds between 2 spectrograms', default=0) parser.add_argument( - '--rf', type=int, help='Frequency Resampling ', default=None) + '--rf', type=int, help='Frequency Resampling', default=None) parser.add_argument( - '--window', type=int, help='Window size for the Fourier Transform', default=1024) + '--window', type=int, help='Window size for the Fourier Transform', + default=1024) parser.add_argument( '--hop', type=float, help='Ratio of hop in window : 50%% = 0.5', default=.5) parser.add_argument( - '--cpu', type=int, help='To speed up the process, write 2 or more', default=1) + '--cpu', type=int, help='To speed up the process, write 2 or more', + default=1) parser.add_argument('--high', type=int, help='High Pass Filter value in Hz', default=10) parser.add_argument('--low', type=int, @@ -185,13 +185,14 @@ if __name__ == '__main__': # Load the data and put it into a DataFrame df = utils.open_file(args.filename_path) - suffix = input('Which suffix for your recording data? [wav, WAV, Wav, flac, mp3] : ') + suffix = input('Which suffix for your recording data? [wav, WAV, Wav, flac, mp3, <other>] : ') + if len(df.columns) == 1: final = [] for file, _ in df.groupby('Path'): new_df = utils.open_file(file) if len(new_df) >= 1: - new_df['Path'] = os.path.join(args.path_to_data, str(file.split('.Table')[0]+f'.{suffix}')) + new_df['Path'] = os.path.join(args.path_to_data, str(file.split('/')[-1].split('.Table')[0]+f'.{suffix}')) final.append(new_df) else: continue @@ -205,7 +206,6 @@ if __name__ == '__main__': species.to_csv(os.path.join( args.directory, 'species_list.csv'), index=False) - if args.cpu == 1: for i in tqdm(enumerate(df.groupby('Path')), total=len(df.groupby('Path')), desc="Processing", ascii='░▒▓█'): @@ -234,23 +234,10 @@ if __name__ == '__main__': directory_path = os.path.join(path, args.directory, 'set') # Create the directory path if not exists utils.create_directory(directory_path) - try : + try : + print(f'Train saved in {directory_path}\n') # Run the split command os.system(f'{sys.executable} {script} {data_path} {directory_path} -r 0.7 --test') - - print(f'Train saved in {directory_path}\n') - print('To train your model, use the following command : \n') - - yolo_path = os.path.join(path, 'yolov5/train.py') - data_path = os.path.join(directory_path, 'custom_data.yaml') - weights_path = os.path.join(path, 'yolov5/weights/yolov5l.pt') - hyp_path = os.path.join(path, 'custom_hyp.yaml') - - command = f'python {yolo_path} --data {data_path} --imgsz 640 --epochs 100 --weights {weights_path} --hyp {hyp_path} --cache' - print(command,'\n') - print('\u26A0\uFE0F Be aware that it is recommended to have background images that', - ' represents 10% of your dataset. To do so, please use the script "get_spectrogram.py"', - ' with --background arguments. Comptue on recordings that contains multiple type of noise...') except Exception as error: print(error) diff --git a/get_train_val.py b/get_train_val.py index de6d9a637b3ef979f2e3f47d5b4292388573fd49..0b8f59407190b076b699f59b496f3bcf3e80e1e0 100755 --- a/get_train_val.py +++ b/get_train_val.py @@ -123,7 +123,7 @@ if __name__ == '__main__': command = f'python {yolo_path} --data {data_path} --imgsz 640 --epochs 100 --weights {weights_path} --hyp {hyp_path} --cache' print(command,'\n') - print('\u26A0\uFE0F Be aware that it is recommended to have background images that', + print('\u26A0\uFE0F Be aware that it is recommended to have background images that', 'represents 10% of your dataset. To do so, please use the script "get_spectrogram.py"', 'with --background arguments. Comptue on recordings that contains multiple type of noise...') \ No newline at end of file diff --git a/utils.py b/utils.py index 947e5bb1dd4b7f6b946cc63620bc6c092ab7c9d9..4918af208ea377a9c00ef0c760fd903e06aae74f 100755 --- a/utils.py +++ b/utils.py @@ -66,7 +66,7 @@ def signal_processing(sig, rf, fs, high=None, low=None): :return array: Processed signal. """ # Check if resampling frequency is different than sampling frequency - if rf == None: + if not rf: rf = fs if rf != fs: sig = signal.resample(sig, int(len(sig) * rf / fs) @@ -97,7 +97,11 @@ def create_spectrogram(sig, directory, names, window_size=1024, overlap=.5): :param directory (str): Path to save the spectrogram. :param filename (str): Name of the final spectrogram. """ - overlap_size = int(window_size * overlap) + if overlap >= 1: + print(f'You put a hop value over 1. This has been corrected to have {overlap} as overlap size between window') + overlap_size = overlap + else: + overlap_size = int(window_size * overlap) stft = librosa.stft(sig, n_fft=window_size, hop_length=overlap_size, window='hann') # Compute the STFT diff --git a/yolov5/data/hyps/hyp.Objects365.yaml b/yolov5/data/hyps/hyp.Objects365.yaml index 74971740f7c73bf661950f339792b790a26b2b1c..f5a1ce15e30601614a23c367e378d089d1b21d7e 100755 --- a/yolov5/data/hyps/hyp.Objects365.yaml +++ b/yolov5/data/hyps/hyp.Objects365.yaml @@ -32,3 +32,5 @@ fliplr: 0.5 mosaic: 1.0 mixup: 0.0 copy_paste: 0.0 +ToGray: 0.0 # image in Grayscale (probability) +GaussNoise: 0.0 # adding Gaussian/white noise (probability) diff --git a/yolov5/data/hyps/hyp.VOC.yaml b/yolov5/data/hyps/hyp.VOC.yaml index 0aa4e7d9f8f5162653e3999b04b4636b103c355f..f0a1fef4e986f93ae4d06a1237a4bd5bff07edfa 100755 --- a/yolov5/data/hyps/hyp.VOC.yaml +++ b/yolov5/data/hyps/hyp.VOC.yaml @@ -38,3 +38,5 @@ mosaic: 0.85834 mixup: 0.04266 copy_paste: 0.0 anchors: 3.412 +ToGray: 0.0 +GaussNoise: 0.0 diff --git a/yolov5/data/hyps/hyp.scratch-high.yaml b/yolov5/data/hyps/hyp.scratch-high.yaml index 123cc8407413e9c130e21a3b5dd8ed33a3632db5..dcde050506de265c2f6e147bd6fa485ac63ecc68 100755 --- a/yolov5/data/hyps/hyp.scratch-high.yaml +++ b/yolov5/data/hyps/hyp.scratch-high.yaml @@ -32,3 +32,5 @@ fliplr: 0.5 # image flip left-right (probability) mosaic: 1.0 # image mosaic (probability) mixup: 0.1 # image mixup (probability) copy_paste: 0.1 # segment copy-paste (probability) +ToGray: 0.0 # image in Grayscale (probability) +GaussNoise: 0.0 # adding Gaussian/white noise (probability) diff --git a/yolov5/data/hyps/hyp.scratch-low.yaml b/yolov5/data/hyps/hyp.scratch-low.yaml index b9ef1d55a3b6ec8873ac87d6f4aa0ca081868bd6..1c65347a5f6d0dc908df5096ff697fe5cfe7e15b 100755 --- a/yolov5/data/hyps/hyp.scratch-low.yaml +++ b/yolov5/data/hyps/hyp.scratch-low.yaml @@ -32,3 +32,5 @@ fliplr: 0.5 # image flip left-right (probability) mosaic: 1.0 # image mosaic (probability) mixup: 0.0 # image mixup (probability) copy_paste: 0.0 # segment copy-paste (probability) +ToGray: 0.0 # image in Grayscale (probability) +GaussNoise: 0.0 # adding Gaussian/white noise (probability) diff --git a/yolov5/data/hyps/hyp.scratch-med.yaml b/yolov5/data/hyps/hyp.scratch-med.yaml index d6867d7557bac73db7f8787db60cff4c4c64b440..75752655527c00629ac0a328c6e3170331b5fe78 100755 --- a/yolov5/data/hyps/hyp.scratch-med.yaml +++ b/yolov5/data/hyps/hyp.scratch-med.yaml @@ -32,3 +32,5 @@ fliplr: 0.5 # image flip left-right (probability) mosaic: 1.0 # image mosaic (probability) mixup: 0.1 # image mixup (probability) copy_paste: 0.0 # segment copy-paste (probability) +ToGray: 0.0 # image in Grayscale (probability) +GaussNoise: 0.0 # adding Gaussian/white noise (probability) diff --git a/yolov5/detect.py b/yolov5/detect.py index fddf942e43c78cf37f1a456124c8475f515be6f0..47d0fb4669e92e2d5dfc6706463a905f5710d4a0 100755 --- a/yolov5/detect.py +++ b/yolov5/detect.py @@ -74,6 +74,8 @@ def run( rf=22050, window=1024, hop=0.5, + low=None, + high=None, visualize=False, # visualize features update=False, # update all models project=ROOT / 'runs/detect', # save results to project/name @@ -97,10 +99,15 @@ def run( project_name = input('Please enter the name of your project : ') date_now = date.today().strftime("%Y%m%d") + if sound: - name = '_'.join([project_name, 'detect', date_now, weights[0].split('/')[-3], 'conf', str(conf_thres).replace('.','_'), str(rf), str(sampleDur), str(window), str(int(window*hop)),'']) + name = '_'.join([project_name, 'detect', date_now, weights[0].split('/')[-3], 'conf', + str(conf_thres).replace('.','_'), str(rf), str(sampleDur), str(window), + str(int(window*hop)),'low',str(int(low)), 'high', str(int(high)),'']) else: - name = '_'.join([project_name, 'detect', date_now, weights[0].split('/')[-3], 'conf', str(conf_thres).replace('.','_'),'']) + name = '_'.join([project_name, 'detect', date_now, weights[0].split('/')[-3], 'conf', + str(conf_thres).replace('.','_'),'']) + print(f'\nYour train results will be saved as {name}\n') # Directories @@ -122,8 +129,12 @@ def run( elif screenshot: dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt) elif sound: - hop = window * hop - dataset = LoadSpectros(source, sampleDur, rf, window, hop, img_size=imgsz, stride=stride, auto=pt) + if hop > 1: + hop = hop + print(f'You put hop > 1, this has been corrected by putting hop at {hop}') + else: + hop = window * hop + dataset = LoadSpectros(source, sampleDur, rf, window, hop, low, high, img_size=imgsz, stride=stride, auto=pt) else: dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride) vid_path, vid_writer = [None] * bs, [None] * bs @@ -260,6 +271,8 @@ def parse_opt(): parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels') parser.add_argument('--sampleDur', default=8, help="Duration for each spectrogram for detection",type=int) parser.add_argument('--rf', default=22050, help="Resampling Frequency",type=int) + parser.add_argument('--low', default=None, help="Low pass filter",type=int) + parser.add_argument('--high', default=None, help="High pass filter",type=int) parser.add_argument('--window', default=1024, help="Window size for each spectrogram for detection",type=int) parser.add_argument('--hop', default=0.5, help="Hop lenght for each spectrogram for detection",type=float) parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences') diff --git a/yolov5/train.py b/yolov5/train.py index ce470f67a98bed3cc0103916bf9f001d508cf52c..ee3be00d73ba66fe2b64842ff87633f6c66a091f 100755 --- a/yolov5/train.py +++ b/yolov5/train.py @@ -82,6 +82,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio if isinstance(hyp, str): with open(hyp, errors='ignore') as f: hyp = yaml.safe_load(f) # load hyps dict + LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items())) opt.hyp = hyp.copy() # for saving hyps to checkpoints diff --git a/yolov5/utils/dataloaders.py b/yolov5/utils/dataloaders.py index e0b047d5ecfe6a19407a5c721a0bd38703b68140..dad59ef8068f583e7481cb744fc4e2f64f8c5598 100755 --- a/yolov5/utils/dataloaders.py +++ b/yolov5/utils/dataloaders.py @@ -16,8 +16,9 @@ from itertools import repeat from multiprocessing.pool import Pool, ThreadPool from pathlib import Path from threading import Thread -import soundfile as sf from urllib.parse import urlparse +import librosa +import soundfile as sf import matplotlib.pyplot as plt from scipy import signal import numpy as np @@ -238,8 +239,8 @@ class LoadScreenshots: return str(self.screen), im, im0, None, s # screen, img, original img, im0s, s class LoadSpectros: - def __init__(self, folder, sampleDur, rf, window, hop, img_size, stride=32, auto=True): - self.folder, self.sampleDur, self.rf, self.window, self.hop, self.img_size, self.stride, self.auto = folder, sampleDur, rf, window, hop, img_size, stride, auto + def __init__(self, folder, sampleDur, rf, window, hop, low, high, img_size, stride=32, auto=True): + self.folder, self.sampleDur, self.rf, self.window, self.hop, self.low, self.high, self.img_size, self.stride, self.auto = folder, sampleDur, rf, window, hop, low, high, img_size, stride, auto self.files = os.listdir(folder) self.mode = 'image' self.samples = [] @@ -271,10 +272,26 @@ class LoadSpectros: if fs != self.rf: sig = signal.resample(sig, int(len(sig)*self.rf/fs)) fig = plt.figure() + + if self.low: + # Create low pass filter + low_pass = signal.butter(1, self.low / (self.rf / 2), 'lp', output='sos') + sig = signal.sosfilt(low_pass, sig) # Apply low pass filter + if self.high: + # Create low pass filter + high_pass = signal.butter(2, self.high / (self.rf / 2), 'hp', output='sos') + sig = signal.sosfilt(high_pass, sig) # Apply high pass filter + + if self.hop > 1: + hop = int(self.hop) + else: + hop = int(self.window * self.hop) + stft = librosa.stft(sig, n_fft=self.window, - hop_length=self.hop, window='hann') # Compute the STFT + hop_length=hop, window='hann') # Compute the STFT stft = np.log10(np.abs(stft)) - axim = plt.imshow(stft, aspect = "auto", interpolation = None, cmap = 'viridis', vmin=np.mean(stft)) + axim = plt.imshow(stft, aspect = "auto", interpolation = None, + cmap = 'viridis', vmin=np.mean(stft)) plt.subplots_adjust(top=1, bottom=0, left=0, right=1) im0 = axim.make_image(fig.canvas)[0][:,:,:-1][:,:,::-1] cv2.imwrite(path, im0) @@ -510,7 +527,8 @@ class LoadImagesAndLabels(Dataset): self.mosaic_border = [-img_size // 2, -img_size // 2] self.stride = stride self.path = path - self.albumentations = Albumentations(size=img_size) if augment else None + self.albumentations = Albumentations(pToGray=self.hyp['ToGray'], + pGaussNoise=self.hyp['GaussNoise'], size=img_size) if augment else None try: f = [] # image files