functions.py

##### IMPORTATIONS #####
import os
import json
import numpy as np

from librosa import load, amplitude_to_db, stft, pcen
from scipy.signal import resample

##### FUNCTIONS #####
def save_dict(dictionary, folder, name, contours=True):
    """
    A function that saves a dictionary to a given path.

    ...

    Parameters
    ----------
    dictionary : dict
        Any dictionary.
    folder : str
        Path to the folder where the dictionary will be saved.
    name : str
        Name of the file in which the dictionary will be saved.
        If there is an extension, should not be different than .json extension.

    Returns
    -------
    None : save dict to json file.
    """
    if len(dictionary) > 0:

        if contours:
            # delete labels with empty labels
            for key in dictionary.keys():
                if len(dictionary[key])<=1:
                    del dictionary[key]

            # sort contouts by starting time.
            dictionary = dict(
                sorted(dictionary.items(),
                key=lambda item: (np.min(np.array(item[1])[:,0]))))

        with open(os.path.join(folder, name), "w") as f:
            json.dump(dictionary, f, indent=4)

def load_waveform(wavefile_name, sr_resample):
    """
    A function that loads any given wavefile
    and it resamples it to a given sampling rate.

    ...

    Parameters
    ----------
    wavefile_name : str
        Path of the wavefile that will be loaded.
    sr_resample : int
        Resampling rate for the waveform.

    Returns
    -------
    wavefile_dec : numpy array
        Loaded and resampled waveform
    """
    wavefile, sr = load(wavefile_name, sr=None)
    wavefile_dec = resample(wavefile,
        int(((len(wavefile)/sr)*sr_resample)))

    return wavefile_dec

def wave_to_spectrogram(waveform, SR, n_fft, w_size, clip, as_pcen=False, top_db=160):
    """
    A function that transforms any given waveform to a spectrogram.

    ...

    Parameters
    ----------
    waveform : numpy array
        Waveform of an audio recording. Shape should be (N, 1).
    SR : int
        Sampling rate of the waveform
    n_fft : int
        Desired size for fft window. Should be in [1, N-1].
    w_size : int
        Desired size for hop length between two fft. Should be in [1, N-1].
    clip : int
        Clipping value for dB. If pixel value < clip, pixel is turned into NaN.
        If as_pcen is selected, clipping will be applied using the values of pixels
        in the orgiginal spectrogram.
    as_pcen : bool, optional.
        Whether the returned image should be a PCEN or not.
        Aka : spectrogram with enhanced contrast.
        Default is False.
    top_db : float, optional.
        Thresholds the output at top_db for amplitude_to_db function.
        Default is 80.

    Returns
    -------
    spectro : numpy array
        Spectrogram of the waveform using the provided parameters.
    audio_length : float
        Duration of the audio in seconds.
    """
    spectrum = np.abs(stft(
        waveform,
        n_fft=n_fft,
        hop_length=w_size))

    if as_pcen:
        spectro_pcen = pcen(spectrum * (2**31), bias=10)
    spectro_og = amplitude_to_db(spectrum, top_db=top_db)
    spectro_og = spectro_og - (np.max(spectro_og))

    if as_pcen:
        spectro_pcen[spectro_og < clip] = np.min(spectro_pcen)
        spectro = spectro_pcen
    else:
        spectro_og[spectro_og < clip] = clip
        spectro = spectro_og

    audio_length = len(waveform)/SR

    return spectro, audio_length