Skip to content
Snippets Groups Projects
Commit f28f1226 authored by Paul Best's avatar Paul Best
Browse files

move data to a specific folder

parent f278a8a0
Branches
No related tags found
No related merge requests found
LiuLabData/*
__pycache__/* __pycache__/*
train_set*
hyenas/*
dclmmpa2011/*
wolves/*
orangs/*
Synth_birds_database/*
pred_pngs/* pred_pngs/*
annot_pngs/* annot_pngs/*
noisy_pngs/* noisy_pngs/*
runs/* runs/*
little_auk/* data/*
Lion Roar Data/*
monk parakeet/*
FCPalmae/*
little_owl/*
white_eye/*
pesto-full/* pesto-full/*
marcelo/* crepe_ft/train_set*.pkl
acoustic_data_sets.zip
species = { species = {
'wolves':{ 'wolves':{
'wavpath': 'wolves/*/*.wav', 'wavpath': 'data/wolves/*/*.wav',
'FS': 16000, 'FS': 16000,
'nfft': 1024, 'nfft': 1024,
'downsample':1, 'downsample':1,
'step': 1/8 'step': 1/8
}, },
# 'SynthBirds':{
# 'wavpath': 'Synth_birds_database/1_original_files/*/*.wav',
# 'FS': 44100,
# 'nfft': 1024,
# 'downsample':1,
# 'step': 1/16
# },
'dclde':{ 'dclde':{
'wavpath': 'dclmmpa2011/cut_no_overlap/*.wav', 'wavpath': 'data/dclmmpa2011/cut_no_overlap/*.wav',
'FS': 192000, 'FS': 192000,
'nfft':1536, # according to silbido paper, 8ms windows and 125Hz resolution 'nfft':1536, # according to silbido paper, 8ms windows and 125Hz resolution
'downsample':20, 'downsample':20,
'step': 1/8 'step': 1/8
}, },
'hyenas':{ 'hyenas':{
'wavpath': 'hyenas/lehmann hyena whoop traces/*.wav', 'wavpath': 'data/hyenas/lehmann hyena whoop traces/*.wav',
'FS': 8000, 'FS': 8000,
'nfft': 2048, 'nfft': 2048,
'downsample':1, 'downsample':1,
'step': 1/8 'step': 1/8
}, },
'orangs':{ 'orangs':{
'wavpath': 'orangs/*.wav', 'wavpath': 'data/orangs/*.wav',
'FS': 44100, 'FS': 44100,
'nfft':2048, 'nfft':2048,
'downsample':1, 'downsample':1,
'step': 1/8 'step': 1/8
}, },
'mice':{ 'mice':{
'wavpath': 'LiuLabData/cut_no_overlap/*.wav', 'wavpath': 'data/LiuLabData/cut_no_overlap/*.wav',
'FS': 250000, 'FS': 250000,
'nfft':512, 'nfft':512,
'downsample':50, 'downsample':50,
'step': 1/8 'step': 1/8
}, },
'lions':{ 'lions':{
'wavpath': 'Lion Roar Data/Biologger Roars Expanded (500ms)/*.WAV', 'wavpath': 'data/Lion Roar Data/Biologger Roars Expanded (500ms)/*.WAV',
'FS':16000, 'FS':16000,
'nfft':2048, # was zero-padded to 4 times 2048 in the original 'nfft':2048, # was zero-padded to 4 times 2048 in the original
'downsample':0.5, 'downsample':0.5,
'step': 1/8 'step': 1/8
}, },
'parakeets':{ 'parakeets':{
'wavpath': 'monk parakeet/pre-processed_calls/*.WAV', 'wavpath': 'data/monk parakeet/pre-processed_calls/*.WAV',
'FS':44100, 'FS':44100,
'nfft':512, 'nfft':512,
'downsample':3, 'downsample':3,
'step': 1/16 'step': 1/16
}, },
'palmae':{ 'palmae':{
'wavpath': 'FCPalmae/cut/*.wav', 'wavpath': 'data/FCPalmae/cut/*.wav',
'FS':44100, 'FS':44100,
'nfft':1024, 'nfft':1024,
'downsample':5, 'downsample':5,
'step': 1/16 'step': 1/16
}, },
'little_owl':{ 'little_owl':{
'wavpath': 'little_owl/cut/*.wav', 'wavpath': 'data/little_owl/cut/*.wav',
'FS':4000, 'FS':4000,
'nfft':512, 'nfft':512,
'downsample':1, 'downsample':1,
'step': 1/8 'step': 1/8
}, },
'white_eye':{ 'white_eye':{
'wavpath': 'white_eye/cut/*.wav', 'wavpath': 'data/white_eye/cut/*.wav',
'FS':44100, 'FS':44100,
'nfft':1024, 'nfft':1024,
'downsample':5, 'downsample':5,
'step': 1/16 'step': 1/16
}, },
'long_billed':{ 'long_billed':{
'wavpath':'marcelo/long_billed_hermit_songs/*.wav', 'wavpath':'data/marcelo/long_billed_hermit_songs/*.wav',
'FS':44100, 'FS':44100,
'nfft':512, 'nfft':512,
'downsample':5, 'downsample':5,
'step': 1/16 'step': 1/16
},
'hummingbird':{
'wavpath':'data/marcelo/hummingbird_songs/*.wav',
'FS':44100,
'nfft':512,
'downsample':5,
'step':1/16
},
'bats':{
'wavpath':'data/marcelo/spixs_disc_winged_bat_*/*.wav',
'FS':400000,
'nfft':512,
'downsample':20,
'step':1/16
} }
} }
...@@ -14,13 +14,12 @@ args = parser.parse_args() ...@@ -14,13 +14,12 @@ args = parser.parse_args()
for specie in species if args.specie is None else [args.specie]: for specie in species if args.specie is None else [args.specie]:
wavpath, FS, nfft, downsample, step = species[specie].values() wavpath, FS, nfft, downsample, step = species[specie].values()
dt = nfft * step / FS # winsize / 8 dt = nfft * step / FS # winsize / 8
Hz2bin = lambda f: np.round(f/FS*nfft).astype(int)
# for fn in glob(wavpath): # for fn in glob(wavpath):
def fun(fn): def fun(fn):
# if os.path.isfile(f'annot_pngs/{fn[:-4]}.png'): # if os.path.isfile(f'annot_pngs/{fn[:-4]}.png'):
# return # return
if not os.path.isdir(f'annot_pngs/{fn.rsplit("/",1)[0]}'): if not os.path.isdir(f'annot_pngs/{fn[5:].rsplit("/",1)[0]}'):
os.mkdir(f'annot_pngs/{fn.rsplit("/",1)[0]}') os.makedirs(f'annot_pngs/{fn[5:].rsplit("/",1)[0]}', exist_ok=True)
# load signal and compute spetrogram # load signal and compute spetrogram
sig, fs = librosa.load(fn, sr=FS) sig, fs = librosa.load(fn, sr=FS)
df = pd.read_csv(f'{fn[:-4]}_preds.csv').dropna(subset='annot') df = pd.read_csv(f'{fn[:-4]}_preds.csv').dropna(subset='annot')
...@@ -30,12 +29,13 @@ for specie in species if args.specie is None else [args.specie]: ...@@ -30,12 +29,13 @@ for specie in species if args.specie is None else [args.specie]:
if 'salience' in df.columns: if 'salience' in df.columns:
plt.scatter(df.time, df.annot, c=df.salience, s=1 if specie in {'dclde','mice'} else None, cmap='jet', vmin=0, vmax=1) plt.scatter(df.time, df.annot, c=df.salience, s=1 if specie in {'dclde','mice'} else None, cmap='jet', vmin=0, vmax=1)
plt.colorbar() plt.colorbar()
plt.title(f'Salience {df.salience.quantile(.25):.2f}, SHR {df.SHR.quantile(.25):.2f}') plt.title(f'Salience {df.salience.quantile(.25):.2f}, SHR {df.SHR.quantile(.25):.2f}, Harmonicity {df.harmonicity.quantile(.25):.2f}')
else: else:
plt.scatter(df.time, df.annot, alpha=.2, s=1 if specie in {'dclde','mice'} else None) plt.scatter(df.time, df.annot, alpha=.2, s=1 if specie in {'dclde','mice'} else None)
plt.ylim(0, df.annot.max()*1.5) plt.ylim(0, df.annot.max()*1.5)
plt.tight_layout() plt.tight_layout()
plt.savefig(f'annot_pngs/{fn[:-4]}.png') plt.savefig(f'annot_pngs/{fn[5:-4]}.png')
plt.close() plt.close()
p_umap(fun, glob(wavpath), desc=specie) files = pd.Series(glob(wavpath))
p_umap(fun, files, desc=specie)
...@@ -7,15 +7,15 @@ import os, argparse ...@@ -7,15 +7,15 @@ import os, argparse
from metadata import species from metadata import species
np.seterr(divide = 'ignore') np.seterr(divide = 'ignore')
algos = ['pyin', 'praat', 'crepe', 'tcrepe', 'tcrepe_ft', 'basic', 'pesto'] algos = ['praat', 'tcrepe_ft']
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('specie', type=str, help="Species to treat specifically", default=None) parser.add_argument('specie', type=str, help="Species to treat specifically", default=None)
args = parser.parse_args() args = parser.parse_args()
for specie in species if args.specie is None else [args.specie]: for specie in species if args.specie is None else [args.specie]:
wavpath, FS, nfft, downsample = species[specie].values() wavpath, FS, nfft, downsample, step = species[specie].values()
thrs = pd.read_csv(f'scores/{specie}_scores.csv', index_col=0).threshold thrs = pd.read_csv(f'scores/{specie}_scores.csv', index_col=0).threshold
dt = nfft / 8 / FS # winsize / 8 dt = nfft * step / FS # winsize / 8
# for fn in glob(wavpath): # for fn in glob(wavpath):
def fun(fn): def fun(fn):
# if os.path.isfile(f'annot_pngs/{fn[:-4]}.png'): # if os.path.isfile(f'annot_pngs/{fn[:-4]}.png'):
...@@ -25,7 +25,7 @@ for specie in species if args.specie is None else [args.specie]: ...@@ -25,7 +25,7 @@ for specie in species if args.specie is None else [args.specie]:
# load signal and compute spetrogram # load signal and compute spetrogram
sig, fs = librosa.load(fn, sr=FS) sig, fs = librosa.load(fn, sr=FS)
df = pd.read_csv(f'{fn[:-4]}_preds.csv') df = pd.read_csv(f'{fn[:-4]}_preds.csv')
df.time /= downsample
S, freqs, times, ax = plt.specgram(sig, Fs=FS, NFFT=nfft, noverlap=int(nfft-dt*fs)) S, freqs, times, ax = plt.specgram(sig, Fs=FS, NFFT=nfft, noverlap=int(nfft-dt*fs))
S = 10*np.log10(S+1e-10) S = 10*np.log10(S+1e-10)
plt.scatter(df.dropna(subset='annot').time, df.dropna(subset='annot').annot, c='k', alpha=.2, label='annot') plt.scatter(df.dropna(subset='annot').time, df.dropna(subset='annot').annot, c='k', alpha=.2, label='annot')
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment