Skip to content
Snippets Groups Projects
Commit a429b659 authored by Paul Best's avatar Paul Best
Browse files

all functionnal

parent 208fc5d0
Branches
No related tags found
No related merge requests found
...@@ -8,7 +8,7 @@ class depthwise_separable_conv1d(nn.Module): ...@@ -8,7 +8,7 @@ class depthwise_separable_conv1d(nn.Module):
self.depthwise = nn.Conv1d(nin, nin, kernel_size=kernel, padding=padding, stride=stride, groups=nin) self.depthwise = nn.Conv1d(nin, nin, kernel_size=kernel, padding=padding, stride=stride, groups=nin)
self.pointwise = nn.Conv1d(nin, nout, kernel_size=1) self.pointwise = nn.Conv1d(nin, nout, kernel_size=1)
def forward(self, x): def forward(self, x):
out = self.depthwise(x) out = self.depthwise(x.squeeze(1))
out = self.pointwise(out) out = self.pointwise(out)
return out return out
...@@ -27,10 +27,13 @@ BALAENOPTERA_NFEAT = 128 ...@@ -27,10 +27,13 @@ BALAENOPTERA_NFEAT = 128
BALAENOPTERA_KERNEL = 5 BALAENOPTERA_KERNEL = 5
get = { get = {
'physeter' : nn.Sequential( 'physeter': {
'weights': 'stft_depthwise_ovs_128_k7_r1.stdc',
'fs': 50000,
'archi': nn.Sequential(
STFT(512, 256), STFT(512, 256),
MelFilter(50000, 512, 64, 2000, 25000), MelFilter(50000, 512, 64, 2000, 25000),
Log1p(), Log1p(trainable=True),
depthwise_separable_conv1d(64, PHYSETER_NFEAT, PHYSETER_KERNEL, stride=2), depthwise_separable_conv1d(64, PHYSETER_NFEAT, PHYSETER_KERNEL, stride=2),
nn.BatchNorm1d(PHYSETER_NFEAT), nn.BatchNorm1d(PHYSETER_NFEAT),
nn.LeakyReLU(), nn.LeakyReLU(),
...@@ -41,10 +44,14 @@ get = { ...@@ -41,10 +44,14 @@ get = {
Dropout1d(), Dropout1d(),
depthwise_separable_conv1d(PHYSETER_NFEAT, 1, PHYSETER_KERNEL, stride=2) depthwise_separable_conv1d(PHYSETER_NFEAT, 1, PHYSETER_KERNEL, stride=2)
), ),
'balaenoptera': nn.Sequential( },
'balaenoptera': {
'weights': 'dw_m128_brown_200Hzhps32_prod_w4_128_k5_r_sch97.stdc',
'fs': 200,
'archi': nn.Sequential(
STFT(256, 32), STFT(256, 32),
MelFilter(200, 256, 128, 0, 100), MelFilter(200, 256, 128, 0, 100),
Log1p(), Log1p(trainable=True),
depthwise_separable_conv1d(128, BALAENOPTERA_NFEAT, kernel=BALAENOPTERA_KERNEL, padding=BALAENOPTERA_KERNEL//2), depthwise_separable_conv1d(128, BALAENOPTERA_NFEAT, kernel=BALAENOPTERA_KERNEL, padding=BALAENOPTERA_KERNEL//2),
nn.BatchNorm1d(BALAENOPTERA_NFEAT), nn.BatchNorm1d(BALAENOPTERA_NFEAT),
nn.LeakyReLU(), nn.LeakyReLU(),
...@@ -54,8 +61,12 @@ get = { ...@@ -54,8 +61,12 @@ get = {
nn.LeakyReLU(), nn.LeakyReLU(),
Dropout1d(), Dropout1d(),
depthwise_separable_conv1d(BALAENOPTERA_NFEAT, 1, kernel=BALAENOPTERA_KERNEL, padding=BALAENOPTERA_KERNEL//2) depthwise_separable_conv1d(BALAENOPTERA_NFEAT, 1, kernel=BALAENOPTERA_KERNEL, padding=BALAENOPTERA_KERNEL//2)
), )
'megaptera' : nn.Sequential( },
'megaptera' : {
'weights': 'sparrow_whales_train8C_2610_frontend2_conv1d_noaugm_bs32_lr.05_.stdc',
'fs': 11025,
'archi': nn.Sequential(
nn.Sequential( nn.Sequential(
STFT(512, 64), STFT(512, 64),
MelFilter(11025, 512, 64, 100, 3000), MelFilter(11025, 512, 64, 100, 3000),
...@@ -90,8 +101,12 @@ get = { ...@@ -90,8 +101,12 @@ get = {
nn.Dropout(p=.5), nn.Dropout(p=.5),
nn.Conv2d(64, 1, 1, bias=False) nn.Conv2d(64, 1, 1, bias=False)
) )
), )
'delphinid' : nn.Sequential( },
'delphinid' : {
'weights': 'sparrow_dolphin_train8_pcen_conv2d_noaugm_bs32_lr.005_.stdc',
'fs': 96000,
'archi': nn.Sequential(
nn.Sequential( nn.Sequential(
STFT(4096, 1024), STFT(4096, 1024),
MelFilter(96000, 4096, 128, 3000, 30000), MelFilter(96000, 4096, 128, 3000, 30000),
...@@ -128,4 +143,46 @@ get = { ...@@ -128,4 +143,46 @@ get = {
nn.MaxPool2d((6, 1)) nn.MaxPool2d((6, 1))
) )
) )
},
'orcinus': {
'weights': 'train_fe76f_00085_85_0',
'fs': 22050,
'archi': nn.Sequential(
nn.Sequential(
STFT(1024, 128),
MelFilter(22050, 1024, 80, 300, 11025),
PCENLayer(80)
),
nn.Sequential(
nn.Conv2d(1, 32, 3, bias=False),
nn.BatchNorm2d(32),
nn.LeakyReLU(0.01),
nn.Conv2d(32, 32, 3,bias=False),
nn.BatchNorm2d(32),
nn.MaxPool2d(3),
nn.LeakyReLU(0.01),
nn.Conv2d(32, 32, 3, bias=False),
nn.BatchNorm2d(32),
nn.LeakyReLU(0.01),
nn.Conv2d(32, 32, 3, bias=False),
nn.BatchNorm2d(32),
nn.LeakyReLU(0.01),
nn.Conv2d(32, 64, (19, 3), bias=False),
nn.BatchNorm2d(64),
nn.MaxPool2d(3),
nn.LeakyReLU(0.01),
nn.Dropout2d(p=.5),
nn.Conv2d(64, 256, (1, 9), bias=False), # for 80 bands
nn.BatchNorm2d(256),
nn.LeakyReLU(0.01),
nn.Dropout2d(p=.5),
nn.Conv2d(256, 64, 1, bias=False),
nn.BatchNorm2d(64),
nn.Dropout2d(p=.5),
nn.LeakyReLU(0.01),
nn.Conv2d(64, 1, 1, bias=False),
nn.AdaptiveMaxPool2d(output_size=(1, 1))
)
)
}
} }
import os import os
import torch import torch
import models import models
from scipy import signal from scipy import signal, special
import soundfile as sf import soundfile as sf
from torch.utils import data
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from tqdm import tqdm from tqdm import tqdm
...@@ -12,7 +11,6 @@ import argparse ...@@ -12,7 +11,6 @@ import argparse
parser = argparse.ArgumentParser(description="Run this script to use a CNN for inference on a folder of audio files.") parser = argparse.ArgumentParser(description="Run this script to use a CNN for inference on a folder of audio files.")
parser.add_argument('audio_folder', type=str, help='Path of the folder with audio files to process') parser.add_argument('audio_folder', type=str, help='Path of the folder with audio files to process')
parser.add_argument('specie', type=str, help='Target specie to detect', choices=['megaptera', 'delphinid', 'orcinus', 'physeter', 'balaenoptera']) parser.add_argument('specie', type=str, help='Target specie to detect', choices=['megaptera', 'delphinid', 'orcinus', 'physeter', 'balaenoptera'])
parser.add_argument('pred_fn', type=str, help='Filename for the output table containing model predictions')
parser.add_argument('-lensample', type=float, help='Length of the signal excerpts to process (sec)', default=5), parser.add_argument('-lensample', type=float, help='Length of the signal excerpts to process (sec)', default=5),
parser.add_argument('-batch_size', type=int, help='Amount of samples to process at a time', default=32), parser.add_argument('-batch_size', type=int, help='Amount of samples to process at a time', default=32),
parser.add_argument('-maxPool', help='Wether to keep only the maximal prediction of a sample or the full sequence', action='store_true'), parser.add_argument('-maxPool', help='Wether to keep only the maximal prediction of a sample or the full sequence', action='store_true'),
...@@ -20,55 +18,32 @@ parser.add_argument('-no-maxPool', dest='maxPool', action='store_false') ...@@ -20,55 +18,32 @@ parser.add_argument('-no-maxPool', dest='maxPool', action='store_false')
parser.set_defaults(maxPool=True) parser.set_defaults(maxPool=True)
args = parser.parse_args() args = parser.parse_args()
meta_model = {
'delphinid': {
'stdc': 'sparrow_dolphin_train8_pcen_conv2d_noaugm_bs32_lr.005_.stdc',
'fs': 96000
},
'megaptera': {
'stdc': 'sparrow_whales_train8C_2610_frontend2_conv1d_noaugm_bs32_lr.05_.stdc',
'fs': 11025
},
'orcinus': '',
'physeter': {
'stdc': 'stft_depthwise_ovs_128_k7_r1.stdc',
'fs': 50000
},
'balaenoptera': {
'stdc': 'dw_m128_brown_200Hzhps32_prod_w4_128_k5_r_sch97.stdc',
'fs': 200
}
}[args.specie]
def collate_fn(batch): def collate_fn(batch):
batch = list(filter(lambda x: x is not None, batch)) batch = list(filter(lambda x: x is not None, batch))
return data.dataloader.default_collate(batch) if len(batch) > 0 else None return torch.utils.data.dataloader.default_collate(batch) if len(batch) > 0 else None
norm = lambda arr: (arr - np.mean(arr) ) / np.std(arr) norm = lambda arr: (arr - np.mean(arr) ) / np.std(arr)
class Dataset(data.Dataset): # Pytorch dataset class to load audio samples
class Dataset(torch.utils.data.Dataset):
def __init__(self, folder, fs, lensample): def __init__(self, folder, fs, lensample):
super(Dataset, self) super(Dataset, self)
print('initializing dataset...') self.fs, self.folder, self.lensample = fs, folder, lensample
self.samples = [] self.samples = []
for fn in os.listdir(folder): for fn in tqdm(os.listdir(folder), desc='Dataset initialization', leave=False):
try: try:
duration = sf.info(folder+fn).duration info = sf.info(folder+fn)
duration, fs = info.duration, info.samplerate
self.samples.extend([{'fn':fn, 'offset':offset, 'fs':fs} for offset in np.arange(0, duration+.01-lensample, lensample)])
except: except:
print(f'Skipping {fn} (unable to read as audio)')
continue continue
self.samples.extend([{'fn':fn, 'offset':offset} for offset in np.arange(0, duration+.01-lensample, lensample)])
self.fs, self.folder, self.lensample = fs, folder, lensample
def __len__(self): def __len__(self):
return len(self.samples) return len(self.samples)
def __getitem__(self, idx): def __getitem__(self, idx):
sample = self.samples[idx] sample = self.samples[idx]
fs = sf.info(self.folder+sample['fn']).samplerate
try: try:
sig, fs = sf.read(self.folder+sample['fn'], start=int(sample['offset']*fs), stop=int((sample['offset']+self.lensample)*fs), always_2d=True) sig, fs = sf.read(self.folder+sample['fn'], start=int(sample['offset']*sample['fs']), stop=int((sample['offset']+self.lensample)*sample['fs']), always_2d=True)
except: except:
print('Failed loading '+sample['fn']) print('Failed loading '+sample['fn'])
return None return None
...@@ -80,24 +55,27 @@ class Dataset(data.Dataset): ...@@ -80,24 +55,27 @@ class Dataset(data.Dataset):
# prepare model # prepare model
model = models.get[args.specie] model = models.get[args.specie]['archi']
model.load_state_dict(torch.load(f"weights/{meta_model['stdc']}")) model.load_state_dict(torch.load(f"weights/{models.get[args.specie]['weights']}"))
model.eval() model.eval()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device) model.to(device)
# prepare data loader and output storage for predictions # prepare data loader and output storage for predictions
loader = data.DataLoader(Dataset(args.audio_folder, meta_model['fs'], args.lensample), batch_size=args.batch_size, collate_fn=collate_fn, num_workers=8, prefetch_factor=4) loader = torch.utils.data.DataLoader(Dataset(args.audio_folder, models.get[args.specie]['fs'], args.lensample),
out = pd.DataFrame(columns=['filename', 'offset', 'prediction']) batch_size=args.batch_size, collate_fn=collate_fn, num_workers=8, prefetch_factor=4)
fns, offsets, preds = [], [], []
if len(loader) == 0: if len(loader) == 0:
print('Unable to open any audio file in the given folder') print(f'Unable to open any audio file in the given folder {args.audiofolder}')
exit() exit()
out = pd.DataFrame(columns=['filename', 'offset', 'prediction'])
fns, offsets, preds = [], [], []
# forward the model on each batch
with torch.no_grad(): with torch.no_grad():
for x, meta in tqdm(loader): for x, meta in tqdm(loader, desc='Model inference'):
x = x.to(device) x = x.to(device)
pred = model(x).cpu().detach().numpy() pred = special.expit(model(x).cpu().detach().numpy())
if args.maxPool: if args.maxPool:
pred = pred.max(axis=-1).reshape(len(x)) pred = pred.max(axis=-1).reshape(len(x))
else: else:
...@@ -107,4 +85,9 @@ with torch.no_grad(): ...@@ -107,4 +85,9 @@ with torch.no_grad():
offsets.extend(meta['offset'].numpy()) offsets.extend(meta['offset'].numpy())
out.filename, out.offset, out.prediction = fns, offsets, preds out.filename, out.offset, out.prediction = fns, offsets, preds
out.to_pickle(args.pred_fn) pred_fn = list(filter(lambda e: e!='', args.audio_folder.split('/')))[-1] + ('.csv' if args.maxPool else '.pkl')
print(f'Saving results into {pred_fn}')
if args.maxPool:
out.to_csv(pred_fn, index=False)
else:
out.to_pickle(pred_fn)
\ No newline at end of file
No preview for this file type
No preview for this file type
File added
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment