Skip to content
Snippets Groups Projects
Commit 208fc5d0 authored by Paul Best's avatar Paul Best
Browse files

added physeter and balaenoptera

parent 87ba81e0
No related branches found
No related tags found
No related merge requests found
...@@ -9,6 +9,6 @@ For example : ...@@ -9,6 +9,6 @@ For example :
`python run_CNN_HB.py file1.wav file2.wav -outfn predictions.pkl` `python run_CNN_HB.py file1.wav file2.wav -outfn predictions.pkl`
This script relies on torch, pandas, numpy, scipy, and tqdm to run. Install dependencies with pip or conda. This script relies on torch, pandas, numpy, scipy, and tqdm to run. Install dependencies with pip or conda.
If a GPU and cuda are available on the current machine, process will run on GPU for faster computation. If a GPU and cuda are available on the current machine, processes will run on GPU for faster computation.
paul.best@univ-tln.fr for more information paul.best@univ-tln.fr for more information
...@@ -2,8 +2,59 @@ from torch import nn ...@@ -2,8 +2,59 @@ from torch import nn
from frontend import STFT, MelFilter, PCENLayer, Log1p from frontend import STFT, MelFilter, PCENLayer, Log1p
class depthwise_separable_conv1d(nn.Module):
def __init__(self, nin, nout, kernel, padding=0, stride=1):
super(depthwise_separable_conv1d, self).__init__()
self.depthwise = nn.Conv1d(nin, nin, kernel_size=kernel, padding=padding, stride=stride, groups=nin)
self.pointwise = nn.Conv1d(nin, nout, kernel_size=1)
def forward(self, x):
out = self.depthwise(x)
out = self.pointwise(out)
return out
class Dropout1d(nn.Module):
def __init__(self, pdropout=.25):
super(Dropout1d, self).__init__()
self.dropout = nn.Dropout2d(pdropout)
def forward(self, x):
x = x.unsqueeze(-1)
x = self.dropout(x)
return x.squeeze(-1)
PHYSETER_NFEAT = 128
PHYSETER_KERNEL = 7
BALAENOPTERA_NFEAT = 128
BALAENOPTERA_KERNEL = 5
get = { get = {
'physeter' : nn.Sequential(
STFT(512, 256),
MelFilter(50000, 512, 64, 2000, 25000),
Log1p(),
depthwise_separable_conv1d(64, PHYSETER_NFEAT, PHYSETER_KERNEL, stride=2),
nn.BatchNorm1d(PHYSETER_NFEAT),
nn.LeakyReLU(),
Dropout1d(),
depthwise_separable_conv1d(PHYSETER_NFEAT, PHYSETER_NFEAT, PHYSETER_KERNEL, stride=2),
nn.BatchNorm1d(PHYSETER_NFEAT),
nn.LeakyReLU(),
Dropout1d(),
depthwise_separable_conv1d(PHYSETER_NFEAT, 1, PHYSETER_KERNEL, stride=2)
),
'balaenoptera': nn.Sequential(
STFT(256, 32),
MelFilter(200, 256, 128, 0, 100),
Log1p(),
depthwise_separable_conv1d(128, BALAENOPTERA_NFEAT, kernel=BALAENOPTERA_KERNEL, padding=BALAENOPTERA_KERNEL//2),
nn.BatchNorm1d(BALAENOPTERA_NFEAT),
nn.LeakyReLU(),
Dropout1d(),
depthwise_separable_conv1d(BALAENOPTERA_NFEAT, BALAENOPTERA_NFEAT, kernel=BALAENOPTERA_KERNEL, padding=BALAENOPTERA_KERNEL//2),
nn.BatchNorm1d(BALAENOPTERA_NFEAT),
nn.LeakyReLU(),
Dropout1d(),
depthwise_separable_conv1d(BALAENOPTERA_NFEAT, 1, kernel=BALAENOPTERA_KERNEL, padding=BALAENOPTERA_KERNEL//2)
),
'megaptera' : nn.Sequential( 'megaptera' : nn.Sequential(
nn.Sequential( nn.Sequential(
STFT(512, 64), STFT(512, 64),
......
...@@ -20,7 +20,6 @@ parser.add_argument('-no-maxPool', dest='maxPool', action='store_false') ...@@ -20,7 +20,6 @@ parser.add_argument('-no-maxPool', dest='maxPool', action='store_false')
parser.set_defaults(maxPool=True) parser.set_defaults(maxPool=True)
args = parser.parse_args() args = parser.parse_args()
meta_model = { meta_model = {
'delphinid': { 'delphinid': {
'stdc': 'sparrow_dolphin_train8_pcen_conv2d_noaugm_bs32_lr.005_.stdc', 'stdc': 'sparrow_dolphin_train8_pcen_conv2d_noaugm_bs32_lr.005_.stdc',
...@@ -31,8 +30,14 @@ meta_model = { ...@@ -31,8 +30,14 @@ meta_model = {
'fs': 11025 'fs': 11025
}, },
'orcinus': '', 'orcinus': '',
'physeter': '', 'physeter': {
'balaenoptera': '' 'stdc': 'stft_depthwise_ovs_128_k7_r1.stdc',
'fs': 50000
},
'balaenoptera': {
'stdc': 'dw_m128_brown_200Hzhps32_prod_w4_128_k5_r_sch97.stdc',
'fs': 200
}
}[args.specie] }[args.specie]
...@@ -42,33 +47,6 @@ def collate_fn(batch): ...@@ -42,33 +47,6 @@ def collate_fn(batch):
norm = lambda arr: (arr - np.mean(arr) ) / np.std(arr) norm = lambda arr: (arr - np.mean(arr) ) / np.std(arr)
def run(folder, stdcfile, model, fs, lensample, batch_size, maxPool):
model.load_state_dict(torch.load(stdcfile))
model.eval()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
out = pd.DataFrame(columns=['fn', 'offset', 'pred'])
fns, offsets, preds = [], [], []
loader = data.DataLoader(Dataset(folder, fs, lensample), batch_size=batch_size, collate_fn=collate_fn, num_workers=8, prefetch_factor=4)
if len(loader) == 0:
print('Unable to open any audio file in the given folder')
with torch.no_grad():
for x, meta in tqdm(loader):
x = x.to(device)
pred = model(x).cpu().detach().numpy()
if maxPool:
pred = pred.max(axis=-1).reshape(len(x))
else:
pred = pred.reshape(len(x), -1)
fns.extend(meta['fn'])
offsets.extend(meta['offset'].numpy())
preds.extend(pred)
out.fn, out.offset, out.pred = fns, offsets, preds
return out
class Dataset(data.Dataset): class Dataset(data.Dataset):
def __init__(self, folder, fs, lensample): def __init__(self, folder, fs, lensample):
super(Dataset, self) super(Dataset, self)
...@@ -78,10 +56,9 @@ class Dataset(data.Dataset): ...@@ -78,10 +56,9 @@ class Dataset(data.Dataset):
try: try:
duration = sf.info(folder+fn).duration duration = sf.info(folder+fn).duration
except: except:
print(f'Skipping {fn} (unable to read)') print(f'Skipping {fn} (unable to read as audio)')
continue continue
for offset in np.arange(0, duration+.01-lensample, lensample): self.samples.extend([{'fn':fn, 'offset':offset} for offset in np.arange(0, duration+.01-lensample, lensample)])
self.samples.append({'fn':fn, 'offset':offset})
self.fs, self.folder, self.lensample = fs, folder, lensample self.fs, self.folder, self.lensample = fs, folder, lensample
def __len__(self): def __len__(self):
...@@ -101,13 +78,33 @@ class Dataset(data.Dataset): ...@@ -101,13 +78,33 @@ class Dataset(data.Dataset):
sig = norm(sig) sig = norm(sig)
return torch.tensor(sig).float(), sample return torch.tensor(sig).float(), sample
preds = run(args.audio_folder,
meta_model['stdc'],
models.get[args.specie],
meta_model['fs'],
args.lensample,
args.batch_size,
args.maxPool
)
preds.to_pickle(args.pred_fn) # prepare model
model = models.get[args.specie]
model.load_state_dict(torch.load(f"weights/{meta_model['stdc']}"))
model.eval()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
# prepare data loader and output storage for predictions
loader = data.DataLoader(Dataset(args.audio_folder, meta_model['fs'], args.lensample), batch_size=args.batch_size, collate_fn=collate_fn, num_workers=8, prefetch_factor=4)
out = pd.DataFrame(columns=['filename', 'offset', 'prediction'])
fns, offsets, preds = [], [], []
if len(loader) == 0:
print('Unable to open any audio file in the given folder')
exit()
with torch.no_grad():
for x, meta in tqdm(loader):
x = x.to(device)
pred = model(x).cpu().detach().numpy()
if args.maxPool:
pred = pred.max(axis=-1).reshape(len(x))
else:
pred = pred.reshape(len(x), -1)
preds.extend(pred)
fns.extend(meta['fn'])
offsets.extend(meta['offset'].numpy())
out.filename, out.offset, out.prediction = fns, offsets, preds
out.to_pickle(args.pred_fn)
File added
File added
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment