diff --git a/.gitignore b/.gitignore index 92c288c8878ed50335f579b6219e592643957270..741c5687754bc9341f87f4ffd81cb8d14bba7ccc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,21 +1,8 @@ -LiuLabData/* __pycache__/* -train_set* -hyenas/* -dclmmpa2011/* -wolves/* -orangs/* -Synth_birds_database/* pred_pngs/* annot_pngs/* noisy_pngs/* runs/* -little_auk/* -Lion Roar Data/* -monk parakeet/* -FCPalmae/* -little_owl/* -white_eye/* +data/* pesto-full/* -marcelo/* -acoustic_data_sets.zip +crepe_ft/train_set*.pkl diff --git a/metadata.py b/metadata.py index 3dcfeb2261451d28d83d3aa4722e9ab3e508f315..30291d0a5fcb335914ad7b5d53631defb75a20d0 100644 --- a/metadata.py +++ b/metadata.py @@ -1,86 +1,93 @@ species = { 'wolves':{ - 'wavpath': 'wolves/*/*.wav', + 'wavpath': 'data/wolves/*/*.wav', 'FS': 16000, 'nfft': 1024, 'downsample':1, 'step': 1/8 }, - # 'SynthBirds':{ - # 'wavpath': 'Synth_birds_database/1_original_files/*/*.wav', - # 'FS': 44100, - # 'nfft': 1024, - # 'downsample':1, - # 'step': 1/16 - # }, 'dclde':{ - 'wavpath': 'dclmmpa2011/cut_no_overlap/*.wav', + 'wavpath': 'data/dclmmpa2011/cut_no_overlap/*.wav', 'FS': 192000, 'nfft':1536, # according to silbido paper, 8ms windows and 125Hz resolution 'downsample':20, 'step': 1/8 }, 'hyenas':{ - 'wavpath': 'hyenas/lehmann hyena whoop traces/*.wav', + 'wavpath': 'data/hyenas/lehmann hyena whoop traces/*.wav', 'FS': 8000, 'nfft': 2048, 'downsample':1, 'step': 1/8 }, 'orangs':{ - 'wavpath': 'orangs/*.wav', + 'wavpath': 'data/orangs/*.wav', 'FS': 44100, 'nfft':2048, 'downsample':1, 'step': 1/8 }, 'mice':{ - 'wavpath': 'LiuLabData/cut_no_overlap/*.wav', + 'wavpath': 'data/LiuLabData/cut_no_overlap/*.wav', 'FS': 250000, 'nfft':512, 'downsample':50, 'step': 1/8 }, 'lions':{ - 'wavpath': 'Lion Roar Data/Biologger Roars Expanded (500ms)/*.WAV', + 'wavpath': 'data/Lion Roar Data/Biologger Roars Expanded (500ms)/*.WAV', 'FS':16000, 'nfft':2048, # was zero-padded to 4 times 2048 in the original 'downsample':0.5, 'step': 1/8 }, 'parakeets':{ - 'wavpath': 'monk parakeet/pre-processed_calls/*.WAV', + 'wavpath': 'data/monk parakeet/pre-processed_calls/*.WAV', 'FS':44100, 'nfft':512, 'downsample':3, 'step': 1/16 }, 'palmae':{ - 'wavpath': 'FCPalmae/cut/*.wav', + 'wavpath': 'data/FCPalmae/cut/*.wav', 'FS':44100, 'nfft':1024, 'downsample':5, 'step': 1/16 }, 'little_owl':{ - 'wavpath': 'little_owl/cut/*.wav', + 'wavpath': 'data/little_owl/cut/*.wav', 'FS':4000, 'nfft':512, 'downsample':1, 'step': 1/8 }, 'white_eye':{ - 'wavpath': 'white_eye/cut/*.wav', + 'wavpath': 'data/white_eye/cut/*.wav', 'FS':44100, 'nfft':1024, 'downsample':5, 'step': 1/16 }, 'long_billed':{ - 'wavpath':'marcelo/long_billed_hermit_songs/*.wav', + 'wavpath':'data/marcelo/long_billed_hermit_songs/*.wav', 'FS':44100, 'nfft':512, 'downsample':5, 'step': 1/16 + }, + 'hummingbird':{ + 'wavpath':'data/marcelo/hummingbird_songs/*.wav', + 'FS':44100, + 'nfft':512, + 'downsample':5, + 'step':1/16 + }, + 'bats':{ + 'wavpath':'data/marcelo/spixs_disc_winged_bat_*/*.wav', + 'FS':400000, + 'nfft':512, + 'downsample':20, + 'step':1/16 } } diff --git a/print_annot.py b/print_annot.py index 45ac930c132e139e05ec50d792d5cb1c939eaa30..97aed3e86e8dbc3625968a65c800bdfeaf7f476f 100644 --- a/print_annot.py +++ b/print_annot.py @@ -14,13 +14,12 @@ args = parser.parse_args() for specie in species if args.specie is None else [args.specie]: wavpath, FS, nfft, downsample, step = species[specie].values() dt = nfft * step / FS # winsize / 8 - Hz2bin = lambda f: np.round(f/FS*nfft).astype(int) # for fn in glob(wavpath): def fun(fn): # if os.path.isfile(f'annot_pngs/{fn[:-4]}.png'): # return - if not os.path.isdir(f'annot_pngs/{fn.rsplit("/",1)[0]}'): - os.mkdir(f'annot_pngs/{fn.rsplit("/",1)[0]}') + if not os.path.isdir(f'annot_pngs/{fn[5:].rsplit("/",1)[0]}'): + os.makedirs(f'annot_pngs/{fn[5:].rsplit("/",1)[0]}', exist_ok=True) # load signal and compute spetrogram sig, fs = librosa.load(fn, sr=FS) df = pd.read_csv(f'{fn[:-4]}_preds.csv').dropna(subset='annot') @@ -30,12 +29,13 @@ for specie in species if args.specie is None else [args.specie]: if 'salience' in df.columns: plt.scatter(df.time, df.annot, c=df.salience, s=1 if specie in {'dclde','mice'} else None, cmap='jet', vmin=0, vmax=1) plt.colorbar() - plt.title(f'Salience {df.salience.quantile(.25):.2f}, SHR {df.SHR.quantile(.25):.2f}') + plt.title(f'Salience {df.salience.quantile(.25):.2f}, SHR {df.SHR.quantile(.25):.2f}, Harmonicity {df.harmonicity.quantile(.25):.2f}') else: plt.scatter(df.time, df.annot, alpha=.2, s=1 if specie in {'dclde','mice'} else None) plt.ylim(0, df.annot.max()*1.5) plt.tight_layout() - plt.savefig(f'annot_pngs/{fn[:-4]}.png') + plt.savefig(f'annot_pngs/{fn[5:-4]}.png') plt.close() - p_umap(fun, glob(wavpath), desc=specie) + files = pd.Series(glob(wavpath)) + p_umap(fun, files, desc=specie) diff --git a/print_preds.py b/print_preds.py index 312e766554f498a10394f7d1ff7b32a1f0131e4b..5f724210429d3087241f79713d9f38da8d088e2a 100644 --- a/print_preds.py +++ b/print_preds.py @@ -7,15 +7,15 @@ import os, argparse from metadata import species np.seterr(divide = 'ignore') -algos = ['pyin', 'praat', 'crepe', 'tcrepe', 'tcrepe_ft', 'basic', 'pesto'] +algos = ['praat', 'tcrepe_ft'] parser = argparse.ArgumentParser() parser.add_argument('specie', type=str, help="Species to treat specifically", default=None) args = parser.parse_args() for specie in species if args.specie is None else [args.specie]: - wavpath, FS, nfft, downsample = species[specie].values() + wavpath, FS, nfft, downsample, step = species[specie].values() thrs = pd.read_csv(f'scores/{specie}_scores.csv', index_col=0).threshold - dt = nfft / 8 / FS # winsize / 8 + dt = nfft * step / FS # winsize / 8 # for fn in glob(wavpath): def fun(fn): # if os.path.isfile(f'annot_pngs/{fn[:-4]}.png'): @@ -25,7 +25,7 @@ for specie in species if args.specie is None else [args.specie]: # load signal and compute spetrogram sig, fs = librosa.load(fn, sr=FS) df = pd.read_csv(f'{fn[:-4]}_preds.csv') - df.time /= downsample + S, freqs, times, ax = plt.specgram(sig, Fs=FS, NFFT=nfft, noverlap=int(nfft-dt*fs)) S = 10*np.log10(S+1e-10) plt.scatter(df.dropna(subset='annot').time, df.dropna(subset='annot').annot, c='k', alpha=.2, label='annot')