Skip to content
Snippets Groups Projects
Commit 2abbd71e authored by Paul Best's avatar Paul Best
Browse files

update ploting scripts and figures

parent dd106a1d
Branches
No related tags found
No related merge requests found
import pandas as pd, numpy as np
import mir_eval.melody
from metadata import species
import matplotlib.pyplot as plt
from glob import glob
from p_tqdm import p_umap
cent_thr = 50
harm_thr = 0.5
sample = lambda x, N: pd.Series(x).sample(min(len(x), N))
file_list = np.concatenate([sample(glob(species[specie]['wavpath'][:-4]+'_preds.csv'), 1000) for specie in species])
algos = ['praat', 'pyin', 'basic', 'pesto', 'pesto_ft', 'tcrepe_ftoth', 'tcrepe_ftsp']
algo_names = ['praat', 'pyin', 'basic', 'pesto-music', 'pesto-bio', 'crepe-other', 'crepe-target']
def fun(fn):
df = pd.read_csv(fn)
df.annot = mir_eval.melody.hz2cents(df.annot)
if df.salience.mean() < 0.15:
return pd.DataFrame()
out = pd.DataFrame(columns=['Pitch acc', 'Chroma acc', 'salience', 'harmonicity'])
for algo in algos:
if not algo+'_f0' in df.columns or df[algo+'_f0'].isna().all():
continue
# out.loc[algo, ['Recall', 'False alarm']] = mir_eval.melody.voicing_measures(df.annot > 0, df[algo+'_conf'] > thrs[algo])
df[algo+'_f0'] = mir_eval.melody.hz2cents(df[algo+'_f0'])
df[algo+'_conf'].clip(0, 1, inplace=True)
pitch_acc = mir_eval.melody.raw_pitch_accuracy(df.annot>0, df.annot, df[algo+'_conf'], df[algo+'_f0'], cent_tolerance=50)
if pitch_acc < 0.05:
continue
out.loc[algo, 'Pitch acc'] = pitch_acc
out.loc[algo, 'Chroma acc'] = mir_eval.melody.raw_chroma_accuracy(df.annot>0, df.annot, df[algo+'_conf'], df[algo+'_f0'], cent_tolerance=50)
out['salience'] = df.salience.mean()
out['harmonicity'] = df.harmonicity.mean() if 'harmonicity' in df.columns else 0
return out
df = pd.concat(p_umap(fun, file_list))
df.salience = df.salience.round(1)
# df.harmonicity = df.harmonicity > 0.5
df.reset_index(names='algo', inplace=True)
harmonic = df[df.harmonicity > harm_thr].groupby(['algo','salience'])[['Pitch acc', 'Chroma acc']].agg(['mean', 'std'])
nonharmonic = df[df.harmonicity < harm_thr].groupby(['algo','salience'])[['Pitch acc', 'Chroma acc']].agg(['mean', 'std'])
fig, ax = plt.subplots(ncols=2, sharey=True, sharex=True, figsize=(10, 3.5))
for algo, name in zip(algos, algo_names):
ax[0].plot(harmonic.loc[algo].index, harmonic.loc[algo, 'Pitch acc']['mean'])
# ax[0].fill_between(harmonic.loc[algo].index, harmonic.loc[algo, 'Pitch acc']['mean']+harmonic.loc[algo, 'Pitch acc']['std'], harmonic.loc[algo, 'Pitch acc']['mean']-harmonic.loc[algo, 'Pitch acc']['std'], alpha=.5)
ax[1].plot(nonharmonic.loc[algo].index, nonharmonic.loc[algo, 'Pitch acc']['mean'], label=name)
# ax[1].fill_between(nonharmonic.loc[algo].index, nonharmonic.loc[algo, 'Pitch acc']['mean']+nonharmonic.loc[algo, 'Pitch acc']['std'], nonharmonic.loc[algo, 'Pitch acc']['mean']-nonharmonic.loc[algo, 'Pitch acc']['std'], alpha=.5)
for i in range(2):
ax[i].set_xlabel('salience')
ax[i].grid()
ax[i].set_title(('' if i==0 else 'non-')+'Harmonic vocalisations')
ax[0].set_ylabel('mean pitch acc')
ax[0].set_ylim(0, 1)
plt.tight_layout(rect=(0, 0, .87, 1))
plt.legend(bbox_to_anchor=(1,1))
plt.savefig('figures/acc_vs_salience.pdf')
......@@ -6,34 +6,51 @@ import os, mir_eval
from metadata import species
np.seterr(divide = 'ignore')
species_list = [
'wolves', 'spotted_hyenas', # 2 good salience & harmonicity
'bottlenose_dolphins', 'rodents', 'little_owls', # 3 good salience only
'monk_parakeets', 'lions', 'orangutans', 'long_billed_hermits', # 4 good harmonicity only
'hummingbirds', 'disk-winged_bats', 'Reunion_white_eyes', 'dolphins', 'la_Palma_chaffinches'] # 5 neither
taxas = ['M', 'M', 'M', 'M', 'A', 'A', 'M', 'M', 'A', 'A', 'M', 'A', 'M', 'A']
fig, ax = plt.subplots(ncols=3, sharey=True, figsize=(10, 3.5))
ax[0].set_xlabel('Frequency (Hz)')
ax[1].set_xlabel('# Voiced time bins')
ax[2].set_xlabel('Modulation rate (Hz/sec)')
ax[1].set_xlabel('Duration (sec)')
ax[2].set_xlabel('Modulation rate (octave/sec)')
for i in range(3):
ax[i].set_xscale('log')
ax[i].set_xscale('log' if i < 2 else 'symlog')
ax[i].grid()
for i, specie in enumerate(species):
mod_rate = lambda x: np.log2(x[1:] / x[:-1])
for i, (specie, tax) in enumerate(zip(species_list, taxas)):
wavpath, FS, nfft, downsample, step = species[specie].values()
dt = nfft * step / FS
fdistrib, tdistrib, moddistrib = [], [], []
for fn in tqdm(glob(wavpath), desc=specie):
files = pd.Series(glob(wavpath))
for fn in tqdm(files.sample(min(3000, len(files))), desc=specie):
annot = pd.read_csv(f'{fn[:-4]}.csv').drop_duplicates(subset='Time').fillna(0)
f0s, mask2 = mir_eval.melody.resample_melody_series(annot.Time, annot.Freq, annot.Freq > 0,\
np.arange(annot.Time.min()+1e-5, annot.Time.max(), dt), kind='linear', verbose=False)
fdistrib.extend(f0s[mask2.astype(bool)])
tdistrib.append(mask2.sum())
moddistrib.extend(abs(np.diff(f0s[mask2.astype(bool)]))/dt)
p = ax[0].violinplot(fdistrib, points=1000, positions=[i], vert=False)
p['bodies'][-1].set_alpha(1)
p = ax[1].violinplot(tdistrib, points=1000, positions=[i], vert=False)
p['bodies'][-1].set_alpha(1)
p = ax[2].violinplot(moddistrib, points=1000, positions=[i], vert=False)
p['bodies'][-1].set_alpha(1)
tdistrib.append(mask2.sum() * dt)
moddistrib.extend(mod_rate(f0s[mask2.astype(bool)])/dt)
#moddistrib.extend(abs(np.diff(f0s[mask2.astype(bool)]))/dt)
for j, data in enumerate([fdistrib, tdistrib, moddistrib]):
p = ax[j].violinplot(data, points=500, positions=[-i], vert=False, quantiles=[0.25, 0.5, 0.75])
p['cquantiles'].set_color('black')
p['bodies'][-1].set_facecolor('C0' if tax == 'M' else 'C1')
p['bodies'][-1].set_alpha(1)
p['cbars'].set_color('C0' if tax == 'M' else 'C1')
p['cmaxes'].set_color('C0' if tax == 'M' else 'C1')
p['cmins'].set_color('C0' if tax == 'M' else 'C1')
# ax[j].boxplot(data, positions=[-i], vert=False)
ax[0].set_xticks(10**np.arange(1, 6))
plt.yticks(np.arange(len(species)), [s.replace('_',' ') for s in species])
ax[2].set_xticks([-1e4, -1e2, -1, 1, 1e2, 1e4])
plt.yticks(-np.arange(len(species_list)), [s.replace('_',' ') for s in species_list])
plt.tight_layout()
plt.savefig('freq_distrib.pdf')
plt.savefig('figures/freq_distrib.pdf')
......@@ -3,9 +3,15 @@ import pandas as pd, numpy as np
from metadata import species
import argparse
algos = ['praat', 'basic', 'pesto', 'pesto_ft', 'tcrepe_ftoth', 'tcrepe_ftsp']
algos = ['praat', 'pyin', 'basic', 'pesto', 'pesto_ft', 'tcrepe_ftoth', 'tcrepe_ftsp']
metrics = ['Pitch acc', 'Chroma acc', 'Recall', 'Specificity']
species_list = [
'wolves','hyenas', 'bottlenose_dolphins', # 3 good salience & harmonicity
'rodents','little_owl','white_eye', # 3 good salience only
'lions','orangutans','parakeets','hummingbirds','long_billed','bats', # 6 good harmonicity only
'dolphins','palmae'] # 4 neither
parser = argparse.ArgumentParser()
parser.add_argument('--drop_noisy_bins', type=bool, help="drop noisy vocalisations", default=False)
parser.add_argument('--drop_noisy_vocs', type=bool, help="drop noisy STFT bins", default=False)
......@@ -17,7 +23,7 @@ ax[0,0].set_ylim(0, 1)
for i, metric in enumerate(metrics):
ok = pd.DataFrame()
for specie in species:
for specie in species_list:
df = pd.read_csv(f'scores/{specie}_scores{"_minusvocs" if drop_noisy_vocs else ""}{"_minusbins" if drop_noisy_bins else ""}.csv', index_col=0)
df['Specificity'] = 1 - df['False alarm']
ok.loc[specie.replace('_',' '), df.index] = df[metric]
......@@ -26,10 +32,11 @@ for i, metric in enumerate(metrics):
# bar plot
m_ax = ax[int(i//2), i%2]
ok.plot.bar(ax=m_ax, legend=None, rot=45, width=.6)
m_ax.grid()
#m_ax.grid()
m_ax.set_title(metric)
if i%2==1:
m_ax.legend(bbox_to_anchor=(1,1))
m_ax.vlines([2.5, 5.5, 11.5], 0, 1, linestyle='dashed', color='k')
plt.tight_layout()
plt.savefig(f'scores{"_minusvocs" if drop_noisy_vocs else ""}{"_minusbins" if drop_noisy_bins else ""}.pdf')
plt.savefig(f'figures/scores{"_minusvocs" if drop_noisy_vocs else ""}{"_minusbins" if drop_noisy_bins else ""}.pdf')
import matplotlib.pyplot as plt
from matplotlib import colors
from matplotlib.patches import Rectangle
import pandas as pd, numpy as np
from metadata import species
import argparse
algos = ['praat', 'tcrepe', 'tcrepe_ftsp', 'tcrepe_ftoth', 'basic', 'pesto', 'pesto_ft']
metrics = ['Pitch acc', 'Chroma acc', 'Recall', 'False alarm']
markers = [['o','d'],['s','v','H'],['<','>','^','p'],['P','*','X','D','h']]
species_list = [
['wolves', 'spotted_hyenas'], # 2 good salience & harmonicity
['bottlenose_dolphins', 'rodents', 'little_owls'], # 3 good salience only
['monk_parakeets', 'lions', 'orangutans', 'long_billed_hermits'], # 4 good harmonicity only
['hummingbirds', 'disk-winged_bats', 'Reunion_white_eyes', 'dolphins', 'la_Palma_chaffinches']] # 5 neither
algos = ['praat', 'pyin', 'basic', 'pesto', 'pesto_ft', 'tcrepe_ftoth', 'tcrepe_ftsp']
algo_names = ['praat', 'pyin', 'basic', 'pesto-music', 'pesto-bio', 'crepe-other', 'crepe-target']
#metrics = ['Pitch acc', 'Chroma acc']
metrics = ['Recall', 'Specificity', 'Vocalisation recall']
parser = argparse.ArgumentParser()
parser.add_argument('--drop_noisy_bins', type=bool, help="drop noisy vocalisations", default=False)
......@@ -12,26 +25,38 @@ parser.add_argument('--drop_noisy_vocs', type=bool, help="drop noisy STFT bins",
args = parser.parse_args()
drop_noisy_vocs, drop_noisy_bins = args.drop_noisy_vocs, args.drop_noisy_bins
fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(10, 5), sharex=True, sharey=True)
ax[0,0].set_ylim(0, 1)
fig, ax = plt.subplots(nrows=1, ncols=len(metrics), figsize=(10, 3.5), sharex=True, sharey=True)
#ax[0].set_yticks([0, .2, .4, .6, .8, 1])
ax[0].set_ylim(0, 1)
algo_legend = []
for specie in species:
df = pd.read_csv(f'scores/{specie}_scores{"_minusvocs" if drop_noisy_vocs else ""}{"_minusbins" if drop_noisy_bins else ""}.csv', index_col=0)
df['False alarm'] = 1 - df['False alarm']
for algo in algos:
if not algo in df.index:
df.loc[algo, metrics] = [None]*len(metrics)
for i, metric in enumerate(metrics):
ax[int(i//2), i%2].scatter(np.arange(len(algos)), df.loc[algos, metric], label=specie)
ok = pd.DataFrame()
for specie in np.concatenate(species_list):
df = pd.read_csv(f'scores/{specie}_scores{"_minusvocs" if drop_noisy_vocs else ""}{"_minusbins" if drop_noisy_bins else ""}.csv', index_col=0)
df['Specificity'] = 1 - df['False alarm']
df.rename(columns={'Voc. recall':'Vocalisation recall'}, inplace=True)
ok.loc[specie, df.index] = df[metric]
for j, algo in enumerate(algos):
for k, (species_grp, markers_grp) in enumerate(zip(species_list, markers)):
x = k-.3+j*.6/len(algos)
y = ok.loc[species_grp, algo].min()
h = ok.loc[species_grp, algo].max()-ok.loc[species_grp, algo].min()
rect = Rectangle((x, y), .6/len(algos), h, facecolor=list(colors.TABLEAU_COLORS)[j], alpha=.5)
if k==0:
algo_legend.append(rect)
ax[i].add_patch(rect)
for l, (specie, marker) in enumerate(zip(species_grp, markers_grp)):
if algo =='pesto' and specie=='LO':
print(ok.loc[specie, algo])
ax[i].scatter(k - .3 + (j+.5) * .6/len(algos), ok.loc[specie, algo], marker=marker, s=10, color='grey', label=specie.replace('_',' ') if j==0 else None)
for i, metric in enumerate(metrics):
m_ax = ax[int(i//2), i%2]
m_ax.grid()
m_ax.set_title(metric)
plt.xticks(np.arange(len(algos)), algos, rotation=22)
ax[1,0].set_xticklabels(algos, rotation=22)
plt.tight_layout(rect=(0, 0, .87, 1))
ax[0, 1].legend(bbox_to_anchor=(1,1))
plt.savefig(f'scatter_scores{"_minusvocs" if drop_noisy_vocs else ""}{"_minusbins" if drop_noisy_bins else ""}.pdf')
m_ax = ax[i] #ax[int(i//2), i%2]
m_ax.grid('both', axis='y')
m_ax.set_title(metric, fontsize='medium')
plt.xticks(np.arange(len(species_list)), ['S-H','S-nH.','nS-H.','nS-nH.'])
#ax[1].set_xticklabels(algos, rotation=22)
plt.tight_layout(rect=(0, 0, .87, .9))
ax[0].legend(loc='lower left', ncols=7, bbox_to_anchor=(0, 1.1), fontsize='x-small')
ax[-1].legend(algo_legend, algo_names, bbox_to_anchor=(1,1))
plt.savefig(f'figures/scatter_detec_scores{"_minusvocs" if drop_noisy_vocs else ""}{"_minusbins" if drop_noisy_bins else ""}.pdf')
import pandas as pd, numpy as np
import matplotlib.pyplot as plt
from scipy import signal
from p_tqdm import p_umap
from glob import glob
import mir_eval, librosa
from metadata import species
np.seterr(divide = 'ignore')
fig, ax = plt.subplots(ncols=2, sharey=True, figsize=(10, 3.5))
SNRs, SHRs = [], []
for i, specie in enumerate(species):
def fun(fn):
df = pd.read_csv(f'{fn[:-4]}_preds.csv')
if not 'salience' in df.columns:
print(fn)
return 0, 0
return df.salience.quantile(.25), df.SHR.quantile(.25)
ret = p_umap(fun, glob(species[specie]['wavpath']), desc=specie, num_cpus=40)
ax[0].violinplot(list(zip(*ret))[0], positions=[i], vert=False)
ax[1].violinplot(list(zip(*ret))[1], positions=[i], vert=False)
ax[0].set_title('f0 saliency')
ax[0].grid()
ax[1].set_title('SHR (dB)')
ax[1].set_xscale('symlog', linthresh=10)
ax[1].set_xticklabels([-100, -10, 0])
ax[1].grid()
plt.yticks(np.arange(len(species)), [s.replace('_',' ') for s in species])
ax[0].vlines(.2, -.5, len(species)-.5, linestyle='dashed', colors='k')
ax[1].vlines(10*np.log10(0.2), -.5, len(species)-.5, linestyle='dashed', colors='k')
species_list = [
'wolves', 'spotted_hyenas', # 2 good salience & harmonicity
'bottlenose_dolphins', 'rodents', 'little_owls', # 3 good salience only
'monk_parakeets', 'lions', 'orangutans', 'long_billed_hermits', # 4 good harmonicity only
'hummingbirds', 'disk-winged_bats', 'Reunion_white_eyes', 'dolphins', 'la_Palma_chaffinches'] # 5 neither
taxas = ['M', 'M', 'M', 'M', 'A', 'A', 'M', 'M', 'A', 'A', 'M', 'A', 'M', 'A']
fig, ax = plt.subplots(ncols=3, sharey=True, figsize=(10, 3.5))
plt.yticks(-np.arange(len(species_list)), [s.replace('_',' ') for s in species_list])
for i, t in enumerate(['Salience', 'SHR', 'Harmonicity']):
ax[i].set_xlabel(t)
ax[i].set_xlim(0, 1)
ax[i].set_axisbelow(True)
ax[i].grid()
ax[0].hlines([-1.5, -4.5, -8.5], 0, 1, linestyle='dashed', color='grey')
ax[1].hlines([-1.5, -4.5, -8.5], 0, 100, linestyle='dashed', color='grey')
ax[2].hlines([-1.5, -4.5, -8.5], 0, 1, linestyle='dashed', color='grey')
for i, (specie, taxa) in enumerate(zip(species_list, taxas)):
fun = lambda fn: pd.read_csv(fn)[['salience', 'SHR', 'harmonicity']].dropna().to_numpy().T
ret = p_umap(fun, glob(species[specie]['wavpath'][:-4]+'_preds.csv'), desc=specie)
salience, SHR, harmonicity = (np.clip(np.concatenate(r), 0, 1) for r in zip(*ret))
SHR = SHR[salience > .2]
harmonicity = harmonicity[salience > .2]
for j, data in enumerate([salience, SHR, harmonicity]):
p = ax[j].violinplot(data, points=500, positions=[-i], vert=False, quantiles=[0.25, 0.5, 0.75], showextrema=False)
p['bodies'][-1].set_facecolor('C0' if taxa == 'M' else 'C1')
p['bodies'][-1].set_alpha(1)
p['cquantiles'].set_color('black')
plt.tight_layout()
plt.savefig('SNR_distrib.pdf')
plt.savefig('figures/SNR_distrib.pdf')
plt.close()
......@@ -22,7 +22,9 @@ for specie in species if args.specie is None else [args.specie]:
os.makedirs(f'annot_pngs/{fn[5:].rsplit("/",1)[0]}', exist_ok=True)
# load signal and compute spetrogram
sig, fs = librosa.load(fn, sr=FS)
df = pd.read_csv(f'{fn[:-4]}_preds.csv').dropna(subset='annot')
# df = pd.read_csv(f'{fn[:-4]}_preds.csv').dropna(subset='annot')
df = pd.read_csv(f'{fn[:-4]}.csv').rename(columns={'Time':'time', 'Freq':'annot'})
plt.specgram(sig, Fs=FS, NFFT=nfft, noverlap=int(nfft-dt*fs))
# plot
......
import pandas as pd, numpy as np
import matplotlib.pyplot as plt
import librosa, mir_eval
from scipy import signal
from glob import glob
from metadata import species
algos = ['praat', 'pesto_ft', 'tcrepe_ftsp']
species_list = [
'wolves', 'spotted_hyenas', # 2 good salience & harmonicity
'bottlenose_dolphins', 'rodents', 'little_owls', # 3 good salience only
'hummingbirds', 'disk-winged_bats', 'Reunion_white_eyes', 'monk_parakeets', 'lions', 'orangutans', 'long_billed_hermits', # 7 good harmonicity only
'dolphins', 'la_Palma_chaffinches'] # 2 neither
fig, ax = plt.subplots(nrows=len(species_list), ncols=5, figsize=(12, 15), sharey='row')
for i, specie in enumerate(species_list):
wavpath, FS, nfft, downsample, step = species[specie].values()
thrs = pd.read_csv(f'scores/{specie}_scores.csv', index_col=0).threshold
files = pd.Series(glob(wavpath)).sample(5)
dt = nfft * step / FS # winsize / 8
for j, fn in enumerate(files):
# load signal and compute spetrogram
sig, fs = librosa.load(fn, sr=FS)
df = pd.read_csv(f'{fn[:-4]}_preds.csv')
df = df[df.annot>0]
freqs, times, S = signal.spectrogram(sig, fs=FS, nperseg=nfft, noverlap=int(nfft-dt*fs))
S = 10*np.log10(S+1e-10)
S -= np.median(S, axis=1, keepdims=True)
plt.autoscale(False)
ax[i, j].imshow(S, vmin=np.quantile(S, .2), vmax=np.quantile(S, .98), origin='lower', aspect='auto', extent=[0, len(sig)/fs, 0, fs/2/1000], cmap='Greys')
plt.autoscale(True)
ax[i, j].scatter(df.dropna(subset='annot').time, df.dropna(subset='annot').annot/1000, label='annot')
for algo in algos:
# if not df[algo+'_f0'].isna().all():
ax[i, j].scatter(df[df[algo+'_conf']>thrs[algo]].time, df[df[algo+'_conf']>thrs[algo]][algo+'_f0']/1000, label=algo, s=3)
ax[i, j].set_ylim(0, df.annot.max()*2/1000)
ax[i,j].set_xticks(np.arange(0, len(sig)/fs, 0.1), "")
if j == 0:
ax[i,j].set_ylabel(specie.replace('_', ' ').replace(' ', '\n', 1))
#plt.legend()
plt.tight_layout(pad=0.1, w_pad=0.1, h_pad=-0.05)
plt.savefig(f'figures/sample_spectrograms.pdf')
......@@ -7,7 +7,7 @@ import os, argparse
from metadata import species
np.seterr(divide = 'ignore')
algos = ['praat', 'tcrepe_ft']
algos = ['pesto_ft']
parser = argparse.ArgumentParser()
parser.add_argument('specie', type=str, help="Species to treat specifically", default=None)
args = parser.parse_args()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment