Skip to content
Snippets Groups Projects
Select Git revision
  • e9e816f5d7675389b87dfd84d0f9c4bf109fceba
  • master default protected
  • loss
  • producer
4 results

MacaonDecode.hpp

Blame
  • functions.py 4.21 KiB
    ##### IMPORTATIONS #####
    import os
    import json
    import numpy as np
    
    from librosa import load, amplitude_to_db, stft, pcen, to_mono
    from scipy.signal import resample
    
    ##### FUNCTIONS #####
    def save_dict(dictionary, folder, name, contours=True):
        """
        A function that saves a dictionary to a given path.
    
        ...
    
        Parameters
        ----------
        dictionary : dict
            Any dictionary.
        folder : str
            Path to the folder where the dictionary will be saved.
        name : str
            Name of the file in which the dictionary will be saved.
            If there is an extension, should not be different than .json extension.
    
        Returns
        -------
        None : save dict to json file.
        """
        if len(dictionary) > 0:
    
            if contours:
                # delete labels with empty labels
                for key in dictionary.keys():
                    if len(dictionary[key])<=1:
                        del dictionary[key]
                
                # sort contouts by starting time.
                dictionary = dict(
                    sorted(dictionary.items(), 
                    key=lambda item: (np.min(np.array(item[1])[:,0]))))
    
            with open(os.path.join(folder, name), "w") as f:
                json.dump(dictionary, f, indent=4) 
    
    def load_waveform(wavefile_name, sr_resample, channel="all"):
        """
        A function that loads any given wavefile 
        and it resamples it to a given sampling rate.
    
        ...
    
        Parameters
        ----------
        wavefile_name : str
            Path of the wavefile that will be loaded.
        sr_resample : int
            Resampling rate for the waveform.
        channel : str, int or np.ndarray
            "all" or int(s) corresponding to the channel to import.
            The integer(s) should correspond to the index of the channel(s) to select.
            Default is "all".
    
        Returns
        -------
        wavefile_dec : numpy array
            Loaded and resampled waveform
        """
        if channel == "all":
            waveform, sr = load(wavefile_name, sr=None)
    
        elif isinstance(channel, int):
            waveforms, sr = load(wavefile_name, sr=None, mono=False)
            waveform = np.copy(waveforms[channel])
        
        elif isinstance(channel, np.ndarray):
            waveforms, sr = load(wavefile_name, sr=None, mono=False)
            waveforms = np.copy(waveforms[channel])
            waveform = to_mono(waveforms)
        
        else:
            raise ValueError(f"Channel '{channel}' unknown. Should be 'all', an integer or an array of integers.")
    
        waveform_dec = resample(waveform, 
            int(((len(waveform)/sr)*sr_resample)))
        return waveform_dec
        
    def wave_to_spectrogram(waveform, SR, n_fft, w_size, clip, as_pcen=False, top_db=160):
        """
        A function that transforms any given waveform to a spectrogram.
    
        ...
    
        Parameters
        ----------
        waveform : numpy array
            Waveform of an audio recording. Shape should be (N, 1).
        SR : int
            Sampling rate of the waveform
        n_fft : int
            Desired size for fft window. Should be in [1, N-1].
        w_size : int
            Desired size for hop length between two fft. Should be in [1, N-1].
        clip : int
            Clipping value for dB. If pixel value < clip, pixel is turned into NaN.
            If as_pcen is selected, clipping will be applied using the values of pixels
            in the orgiginal spectrogram.
        as_pcen : bool, optional.
            Whether the returned image should be a PCEN or not.
            Aka : spectrogram with enhanced contrast.
            Default is False.
        top_db : float, optional.
            Thresholds the output at top_db for amplitude_to_db function.
            Default is 80.
    
        Returns
        ------- 
        spectro : numpy array
            Spectrogram of the waveform using the provided parameters.
        audio_length : float
            Duration of the audio in seconds.
        """
        spectrum = np.abs(stft(
            waveform, 
            n_fft=n_fft, 
            hop_length=w_size))
    
        if as_pcen:
            spectro_pcen = pcen(spectrum * (2**31), bias=10)
        spectro_og = amplitude_to_db(spectrum, top_db=top_db)
        spectro_og = spectro_og - (np.max(spectro_og))
    
        if as_pcen:
            spectro_pcen[spectro_og < clip] = np.min(spectro_pcen)
            spectro = spectro_pcen
        else:
            spectro_og[spectro_og < clip] = clip
            spectro = spectro_og
    
        audio_length = len(waveform)/SR
    
        return spectro, audio_length