Skip to content
Snippets Groups Projects
Commit 5477ccb6 authored by Paul Best's avatar Paul Best
Browse files

update vocalisation characterisation

parent 245880da
Branches
No related tags found
No related merge requests found
import pandas as pd, numpy as np
from p_tqdm import p_umap
from metadata import species
from glob import glob
import librosa
def fun(fn):
sig, fs = librosa.load(fn)
df = pd.read_csv(fn[:-4]+'_preds.csv')
start = df[df.annot > 0].time.min()
end = df[df.annot > 0].time.max()
S = np.std(sig[int(start*fs):int(end*fs)])
N = np.std(np.concatenate([sig[:int(start*fs)], sig[int(end*fs):]]))
return fn, 10 * np.log10(S/N)
for specie in species:
ret = p_umap(fun, glob(species[specie]['wavpath']), desc=specie)
fns, SNRs = zip(*ret)
df = pd.DataFrame({'fn':fns, 'SNR':SNRs})
print(df.SNR.describe())
df.to_csv(f'SNRs/{specie}.csv')
......@@ -2,7 +2,7 @@ from glob import glob
from p_tqdm import p_umap
import pandas as pd, numpy as np, argparse
from metadata import species
import librosa
import librosa, os
np.seterr(divide = 'ignore')
parser = argparse.ArgumentParser()
......@@ -11,35 +11,35 @@ args = parser.parse_args()
for specie in species if args.specie == 'all' else [args.specie]:
wavpath, FS, nfft, downsample, step = species[specie].values()
Hz2bin = lambda f: np.floor(f / FS * nfft).astype(int) # f in _preds.csv are already downsamples
Hz2bin = lambda f: np.floor(f / FS * nfft).astype(int)
dt = nfft *step
hann = np.hanning(nfft)
get_spectrum = lambda x : 10 * np.log10(np.abs(np.fft.rfft(hann * x))**2 / (nfft/FS*1.5) +1) +1e-10 # density scaling, log1p
get_spectrum = lambda x : np.abs(np.fft.rfft(hann * x))
def fun(fn):
# for fn in glob(wavpath):
sig, fs = librosa.load(fn, sr=FS)
try:
df = pd.read_csv(f'{fn[:-4]}_preds.csv')
except:
return
df.SNR, df.SHR, df.salience = None, None, None
df.SNR, df.SHR, df.salience, df.SNR_, df.SHR_, df.salience_ = None, None, None, None, None, None
shr_ceil = min(fs/2, df.annot.max() * 5)
# compute median background noise for unvoiced frames
spectrums = []
for t in np.arange(nfft//2, len(sig)-nfft//2, dt, dtype=int):
spectrums.append(get_spectrum(sig[t - nfft//2 : t + nfft//2]))
noise = np.median(np.vstack(spectrums).T, axis=1)
unvoiced = df.annot.isna()
if not unvoiced.any(): # if there aren't any unvoiced frames, we take the whole signal to estimate the noise
unvoiced = [True]*len(df)
spectrums = np.vstack([get_spectrum(sig[t - nfft//2 : t + nfft//2]) for t in (df[unvoiced].time * fs).round().astype(int)]).T
mednoise, stdnoise = np.median(spectrums, axis=1), np.std(spectrums, axis=1)
# computer saliency and SHR for each voiced frame
for i, r in df[df.annot>0].iterrows():
if FS*r.time < nfft//2 or FS*r.time > len(sig) - nfft//2 or (sig[int(FS*r.time) - nfft//2 : int(FS*r.time) + nfft//2] == 0).all():
continue
spec = get_spectrum(sig[int(FS*r.time) - nfft//2 : int(FS*r.time) + nfft//2])
spec = np.clip(spec-noise, 1e-5, 10)
spec = np.clip((spec-mednoise)/stdnoise, 1e-12, 1e3)
f0 = r.annot
df.loc[i, 'harmonicity'] = sum(spec[Hz2bin(np.arange(f0*2, shr_ceil, f0))]) / sum(spec[Hz2bin(np.arange(f0, shr_ceil, f0))]) if f0 *2 < fs / 2 else None
df.loc[i, 'salience'] = sum(spec[Hz2bin(f0*2**(-1/12)):Hz2bin(f0*2**(1/12))+1]) / sum(spec[Hz2bin(f0*2**(-6/12)):Hz2bin(f0*2**(6/12))+1])
df.loc[i, 'SHR'] = sum(spec[Hz2bin(np.arange(f0, shr_ceil, f0)-f0/2)]) - sum(spec[Hz2bin(np.arange(f0, shr_ceil, f0))])
df.loc[i, 'SHR'] = sum(spec[Hz2bin(np.arange(f0, shr_ceil, f0)-f0/2)]) / sum(spec[Hz2bin(np.arange(f0, shr_ceil, f0))]) if f0 < fs/2 else None
df.to_csv(f'{fn[:-4]}_preds.csv', index=False)
p_umap(fun, glob(wavpath), desc=specie)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment