"""Extracts spectrograms from multiple recordings""" import os import argparse import numpy as np import soundfile from p_tqdm import p_map from tqdm import tqdm import utils import pandas as pd def main(data, arguments): """ Load the data and compute n spectrograms with a specific duration and save it into a folder. :param data (DataFrame): DataFrame containing the path of each file to process. :param arguments (args): Arguments for signal processing and directory. """ _, (i) = data filename = str(i[0]) # Store the filename of the recording try: info = soundfile.info(filename) # Collection recording information file_duration, fs = info.duration, info.samplerate except Exception as error: print(f'`{filename}` cannot be open : : {error}') return # Create the list of all possible offset to compute spectrogram offset_list = np.arange(0, file_duration, arguments.duration - arguments.overlap) for offset in offset_list: file = filename.replace('/', '_').split('.', maxsplit=1)[0] try: sig, fs = soundfile.read(filename, start=int( offset*fs), stop=int((offset+arguments.duration)*fs), always_2d=True) # Load the signal sig = sig[:, 0] # Only take channel 0 # Apply resample and low/high pass filter sig = utils.signal_processing( sig, rf=arguments.rf, fs=fs, high=arguments.high, low=arguments.low) # Check if empty .txt annotation file is needed if arguments.background: folder = 'background' utils.create_directory(os.path.join(arguments.directory, folder)) name = os.path.join(arguments.directory, folder, f'{file}_{offset}') empty_dataframe = pd.DataFrame(columns=['specie', 'x', 'y', 'w', 'h']) empty_dataframe.to_csv(str(name+'.txt'), sep=' ', index=False, header=False) else: folder = 'spectrograms' utils.create_directory(os.path.join(arguments.directory, folder)) name = os.path.join(arguments.directory, folder, f'{file}_{offset}') utils.create_spectrogram( sig, arguments.directory, name, arguments.cmap, arguments.vmin, window_size=int(arguments.window), overlap=arguments.hop) except Exception as error: folder = 'spectrograms' print(f'`{filename}` cannot be open : {error}') return folder if __name__ == "__main__": parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Extract spectrogram for each .wav file') parser.add_argument('path', type=utils.arg_directory, help='Path of the folder/file that contains the recordings') parser.add_argument('directory', type=utils.arg_directory, help='Directory to which spectrograms will be stored') parser.add_argument('--duration', type=int, help='Duration for each spectrogram', default=8) parser.add_argument('--window', type=int, help='Window size for the Fourier Transform', default=1024) parser.add_argument('--hop', type=float, help='Ratio of hop in window : 50%% = 0.5', default=0.5) parser.add_argument('--cmap', type=str, help='Colormar of the spectrograms', choices=['jet', 'cividis', 'viridis'], default='viridis') parser.add_argument('--high', type=int, help='High Pass Filter value in Hz', default=10) parser.add_argument('--low', type=int, help='Low Pass Filter value in Hz', default=None) parser.add_argument('--overlap', type=int, help='Overlap in secondes between 2 spectrograms', default=0) parser.add_argument('--rf', type=int, help='Resampling Frequency of the signal. If no argument,' ' will be original frequency sampling of the recording', default=None) parser.add_argument('--vmin', type=str, help="If vmin == True, then the spectrogram's minimum color" ' will be stft.mean(). If False stft.min()', default=True) parser.add_argument( '--cpu', type=int, help='To speed up the process, write 2 or more', default=1) parser.add_argument( '--background', action='store_const', help='If in arguments, will save an empty .txt file', const=1, default=None) args = parser.parse_args() # Load the data and put it into a DataFrame df = utils.open_file(args.path) if args.cpu == 1: # If no multiprocessing, then loop processing for num, row in tqdm(df.iterrows(), total=len(df), desc="Processing", ascii='░▒▓█'): dest = main([num, [row.Path]], args) directory = args.directory else: args = [args]*len(df.groupby('Path')) dest = p_map(main, enumerate(df.groupby('Path')), args, num_cpus=args[0].cpu, total=len(df.groupby('Path'))) directory = args[0].directory final_dest = os.path.join(directory, dest[0] if len(dest) is list else dest) print(f'Saved to {final_dest}')