From 5477ccb6aecbac32e0a4ac419fc2634f1d3c63a5 Mon Sep 17 00:00:00 2001
From: "paul.best" <paul.best@lis-lab.fr>
Date: Tue, 11 Jun 2024 15:53:37 +0200
Subject: [PATCH] update vocalisation characterisation

---
 get_SNR.py                                    | 22 ++++++++++++++
 ...SHR.py => vocalisation_characterisation.py | 30 +++++++++----------
 2 files changed, 37 insertions(+), 15 deletions(-)
 create mode 100644 get_SNR.py
 rename compute_salience_SHR.py => vocalisation_characterisation.py (56%)
 mode change 100644 => 100755

diff --git a/get_SNR.py b/get_SNR.py
new file mode 100644
index 0000000..2f0c573
--- /dev/null
+++ b/get_SNR.py
@@ -0,0 +1,22 @@
+import pandas as pd, numpy as np
+from p_tqdm import p_umap
+from metadata import species
+from glob import glob
+import librosa
+
+
+def fun(fn):
+    sig, fs = librosa.load(fn)
+    df = pd.read_csv(fn[:-4]+'_preds.csv')
+    start = df[df.annot > 0].time.min()
+    end = df[df.annot > 0].time.max()
+    S = np.std(sig[int(start*fs):int(end*fs)])
+    N = np.std(np.concatenate([sig[:int(start*fs)], sig[int(end*fs):]]))
+    return fn, 10 * np.log10(S/N)
+
+for specie in species:
+    ret = p_umap(fun, glob(species[specie]['wavpath']), desc=specie)
+    fns, SNRs = zip(*ret)
+    df = pd.DataFrame({'fn':fns, 'SNR':SNRs})
+    print(df.SNR.describe())
+    df.to_csv(f'SNRs/{specie}.csv')
diff --git a/compute_salience_SHR.py b/vocalisation_characterisation.py
old mode 100644
new mode 100755
similarity index 56%
rename from compute_salience_SHR.py
rename to vocalisation_characterisation.py
index 6c33ca4..51f17ba
--- a/compute_salience_SHR.py
+++ b/vocalisation_characterisation.py
@@ -2,7 +2,7 @@ from glob import glob
 from p_tqdm import p_umap
 import pandas as pd, numpy as np, argparse
 from metadata import species
-import librosa
+import librosa, os
 np.seterr(divide = 'ignore')
 
 parser = argparse.ArgumentParser()
@@ -11,35 +11,35 @@ args = parser.parse_args()
 
 for specie in species if args.specie == 'all' else [args.specie]:
     wavpath, FS, nfft, downsample, step = species[specie].values()
-    Hz2bin = lambda f: np.floor(f / FS * nfft).astype(int) # f in _preds.csv are already downsamples
+    Hz2bin = lambda f: np.floor(f / FS * nfft).astype(int)
     dt = nfft *step
     hann = np.hanning(nfft)
-    get_spectrum = lambda x : 10 * np.log10(np.abs(np.fft.rfft(hann * x))**2 / (nfft/FS*1.5) +1) +1e-10 # density scaling, log1p 
+    get_spectrum = lambda x : np.abs(np.fft.rfft(hann * x))
 
     def fun(fn):
-    # for fn in glob(wavpath):
+#    for fn in glob(wavpath):
         sig, fs = librosa.load(fn, sr=FS)
-        try:
-            df = pd.read_csv(f'{fn[:-4]}_preds.csv')
-        except:
-            return
-        df.SNR, df.SHR, df.salience = None, None, None
+        df = pd.read_csv(f'{fn[:-4]}_preds.csv')
+        df.SNR, df.SHR, df.salience, df.SNR_, df.SHR_, df.salience_ = None, None, None, None, None, None
         shr_ceil = min(fs/2, df.annot.max() * 5)
         # compute median background noise for unvoiced frames
-        spectrums = []
-        for t in np.arange(nfft//2, len(sig)-nfft//2, dt, dtype=int):
-            spectrums.append(get_spectrum(sig[t - nfft//2 : t + nfft//2]))
-        noise = np.median(np.vstack(spectrums).T, axis=1)
+        unvoiced = df.annot.isna()
+        if not unvoiced.any(): # if there aren't any unvoiced frames, we take the whole signal to estimate the noise
+            unvoiced = [True]*len(df)
+        spectrums = np.vstack([get_spectrum(sig[t - nfft//2 : t + nfft//2]) for t in (df[unvoiced].time * fs).round().astype(int)]).T
+        mednoise, stdnoise = np.median(spectrums, axis=1), np.std(spectrums, axis=1)
         # computer saliency and SHR for each voiced frame
         for i, r in df[df.annot>0].iterrows():
             if FS*r.time < nfft//2 or FS*r.time > len(sig) - nfft//2 or (sig[int(FS*r.time) - nfft//2 : int(FS*r.time) + nfft//2] == 0).all():
                 continue
             spec = get_spectrum(sig[int(FS*r.time) - nfft//2 : int(FS*r.time) + nfft//2])
-            spec = np.clip(spec-noise, 1e-5, 10)
+            spec = np.clip((spec-mednoise)/stdnoise, 1e-12, 1e3)
             f0 = r.annot
 
+            df.loc[i, 'harmonicity'] = sum(spec[Hz2bin(np.arange(f0*2, shr_ceil, f0))]) /  sum(spec[Hz2bin(np.arange(f0, shr_ceil, f0))]) if f0 *2 < fs / 2 else None
             df.loc[i, 'salience'] = sum(spec[Hz2bin(f0*2**(-1/12)):Hz2bin(f0*2**(1/12))+1]) / sum(spec[Hz2bin(f0*2**(-6/12)):Hz2bin(f0*2**(6/12))+1])
-            df.loc[i, 'SHR'] = sum(spec[Hz2bin(np.arange(f0, shr_ceil, f0)-f0/2)]) - sum(spec[Hz2bin(np.arange(f0, shr_ceil, f0))])
+            df.loc[i, 'SHR'] = sum(spec[Hz2bin(np.arange(f0, shr_ceil, f0)-f0/2)]) / sum(spec[Hz2bin(np.arange(f0, shr_ceil, f0))]) if f0 < fs/2 else None
+
         df.to_csv(f'{fn[:-4]}_preds.csv', index=False)
 
     p_umap(fun, glob(wavpath), desc=specie)
-- 
GitLab