Skip to content
Snippets Groups Projects
Commit 5477ccb6 authored by Paul Best's avatar Paul Best
Browse files

update vocalisation characterisation

parent 245880da
No related branches found
No related tags found
No related merge requests found
import pandas as pd, numpy as np
from p_tqdm import p_umap
from metadata import species
from glob import glob
import librosa
def fun(fn):
sig, fs = librosa.load(fn)
df = pd.read_csv(fn[:-4]+'_preds.csv')
start = df[df.annot > 0].time.min()
end = df[df.annot > 0].time.max()
S = np.std(sig[int(start*fs):int(end*fs)])
N = np.std(np.concatenate([sig[:int(start*fs)], sig[int(end*fs):]]))
return fn, 10 * np.log10(S/N)
for specie in species:
ret = p_umap(fun, glob(species[specie]['wavpath']), desc=specie)
fns, SNRs = zip(*ret)
df = pd.DataFrame({'fn':fns, 'SNR':SNRs})
print(df.SNR.describe())
df.to_csv(f'SNRs/{specie}.csv')
...@@ -2,7 +2,7 @@ from glob import glob ...@@ -2,7 +2,7 @@ from glob import glob
from p_tqdm import p_umap from p_tqdm import p_umap
import pandas as pd, numpy as np, argparse import pandas as pd, numpy as np, argparse
from metadata import species from metadata import species
import librosa import librosa, os
np.seterr(divide = 'ignore') np.seterr(divide = 'ignore')
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
...@@ -11,35 +11,35 @@ args = parser.parse_args() ...@@ -11,35 +11,35 @@ args = parser.parse_args()
for specie in species if args.specie == 'all' else [args.specie]: for specie in species if args.specie == 'all' else [args.specie]:
wavpath, FS, nfft, downsample, step = species[specie].values() wavpath, FS, nfft, downsample, step = species[specie].values()
Hz2bin = lambda f: np.floor(f / FS * nfft).astype(int) # f in _preds.csv are already downsamples Hz2bin = lambda f: np.floor(f / FS * nfft).astype(int)
dt = nfft *step dt = nfft *step
hann = np.hanning(nfft) hann = np.hanning(nfft)
get_spectrum = lambda x : 10 * np.log10(np.abs(np.fft.rfft(hann * x))**2 / (nfft/FS*1.5) +1) +1e-10 # density scaling, log1p get_spectrum = lambda x : np.abs(np.fft.rfft(hann * x))
def fun(fn): def fun(fn):
# for fn in glob(wavpath): # for fn in glob(wavpath):
sig, fs = librosa.load(fn, sr=FS) sig, fs = librosa.load(fn, sr=FS)
try:
df = pd.read_csv(f'{fn[:-4]}_preds.csv') df = pd.read_csv(f'{fn[:-4]}_preds.csv')
except: df.SNR, df.SHR, df.salience, df.SNR_, df.SHR_, df.salience_ = None, None, None, None, None, None
return
df.SNR, df.SHR, df.salience = None, None, None
shr_ceil = min(fs/2, df.annot.max() * 5) shr_ceil = min(fs/2, df.annot.max() * 5)
# compute median background noise for unvoiced frames # compute median background noise for unvoiced frames
spectrums = [] unvoiced = df.annot.isna()
for t in np.arange(nfft//2, len(sig)-nfft//2, dt, dtype=int): if not unvoiced.any(): # if there aren't any unvoiced frames, we take the whole signal to estimate the noise
spectrums.append(get_spectrum(sig[t - nfft//2 : t + nfft//2])) unvoiced = [True]*len(df)
noise = np.median(np.vstack(spectrums).T, axis=1) spectrums = np.vstack([get_spectrum(sig[t - nfft//2 : t + nfft//2]) for t in (df[unvoiced].time * fs).round().astype(int)]).T
mednoise, stdnoise = np.median(spectrums, axis=1), np.std(spectrums, axis=1)
# computer saliency and SHR for each voiced frame # computer saliency and SHR for each voiced frame
for i, r in df[df.annot>0].iterrows(): for i, r in df[df.annot>0].iterrows():
if FS*r.time < nfft//2 or FS*r.time > len(sig) - nfft//2 or (sig[int(FS*r.time) - nfft//2 : int(FS*r.time) + nfft//2] == 0).all(): if FS*r.time < nfft//2 or FS*r.time > len(sig) - nfft//2 or (sig[int(FS*r.time) - nfft//2 : int(FS*r.time) + nfft//2] == 0).all():
continue continue
spec = get_spectrum(sig[int(FS*r.time) - nfft//2 : int(FS*r.time) + nfft//2]) spec = get_spectrum(sig[int(FS*r.time) - nfft//2 : int(FS*r.time) + nfft//2])
spec = np.clip(spec-noise, 1e-5, 10) spec = np.clip((spec-mednoise)/stdnoise, 1e-12, 1e3)
f0 = r.annot f0 = r.annot
df.loc[i, 'harmonicity'] = sum(spec[Hz2bin(np.arange(f0*2, shr_ceil, f0))]) / sum(spec[Hz2bin(np.arange(f0, shr_ceil, f0))]) if f0 *2 < fs / 2 else None
df.loc[i, 'salience'] = sum(spec[Hz2bin(f0*2**(-1/12)):Hz2bin(f0*2**(1/12))+1]) / sum(spec[Hz2bin(f0*2**(-6/12)):Hz2bin(f0*2**(6/12))+1]) df.loc[i, 'salience'] = sum(spec[Hz2bin(f0*2**(-1/12)):Hz2bin(f0*2**(1/12))+1]) / sum(spec[Hz2bin(f0*2**(-6/12)):Hz2bin(f0*2**(6/12))+1])
df.loc[i, 'SHR'] = sum(spec[Hz2bin(np.arange(f0, shr_ceil, f0)-f0/2)]) - sum(spec[Hz2bin(np.arange(f0, shr_ceil, f0))]) df.loc[i, 'SHR'] = sum(spec[Hz2bin(np.arange(f0, shr_ceil, f0)-f0/2)]) / sum(spec[Hz2bin(np.arange(f0, shr_ceil, f0))]) if f0 < fs/2 else None
df.to_csv(f'{fn[:-4]}_preds.csv', index=False) df.to_csv(f'{fn[:-4]}_preds.csv', index=False)
p_umap(fun, glob(wavpath), desc=specie) p_umap(fun, glob(wavpath), desc=specie)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment