Skip to content
Snippets Groups Projects
Commit fe3ac67b authored by Paul Best's avatar Paul Best
Browse files

update

parent 2a15cf14
No related branches found
No related tags found
No related merge requests found
This diff is collapsed.
......@@ -5,6 +5,16 @@ import utils as u
from filterbank import STFT, MelFilter, Log1p
meta = {
'gibbon':{
'sr':9600,
'nfft':1024,
'sampleDur':8
},
'meerkat':{
'sr':8000,
'nfft':256,
'sampleDur':0.2
},
'zebra_finch':{
'sr': 44100,
'nfft': 1024,
......@@ -12,12 +22,12 @@ meta = {
},
'bengalese_finch1':{
'sr': 32000,
'nfft': 512,
'nfft': 256,
'sampleDur': 0.1
},
'bengalese_finch2':{
'sr': 32000,
'nfft': 512,
'nfft': 256,
'sampleDur': 0.1
},
'black-headed_grosbeaks':{
......@@ -87,6 +97,13 @@ frontend = {
nn.InstanceNorm2d(1),
u.Croper2D(n_mel, 128)
),
'logMel_norm': lambda sr, nfft, sampleDur, n_mel : nn.Sequential(
STFT(nfft, int((sampleDur*sr - nfft)/128)),
MelFilter(sr, nfft, n_mel, 0, sr//2),
Log1p(7, trainable=False),
u.Norm(),
u.Croper2D(n_mel, 128)
),
'logMel_vggish': lambda sr, nfft, sampleDur, n_mel : nn.Sequential(
STFT(nfft, int((sampleDur*sr - nfft)/96)),
MelFilter(sr, nfft, n_mel, 0, sr//2),
......
from sklearn import metrics
import matplotlib.pyplot as plt
#import umap, hdbscan
from tqdm import tqdm
import argparse, os
import models, utils as u
import pandas as pd, numpy as np, torch
from hearbaseline import wav2vec2 as hear
parser = argparse.ArgumentParser()
parser.add_argument("specie", type=str)
parser.add_argument("-cuda", type=int, default=0)
args = parser.parse_args()
df = pd.read_csv(f'{args.specie}/{args.specie}.csv')
meta = models.meta[args.specie]
batch_size = 32
if True : #not os.path.isfile(f'{args.specie}/encodings/encodings_wave2vec2.npy'):
gpu = torch.device(f'cuda:{args.cuda}')
model = hear.load_model().to(gpu)
loader = torch.utils.data.DataLoader(u.Dataset(df, f'{args.specie}/audio/', model.sample_rate, meta['sampleDur']), batch_size=batch_size, num_workers=8, collate_fn=u.collate_fn)
with torch.inference_mode():
encodings, idxs = [], []
for x, idx in tqdm(loader, desc='test '+args.specie, leave=False):
encoding = hear.get_scene_embeddings(x.to(gpu), model=model)
idxs.extend(idx.numpy())
encodings.extend(encoding.view(len(x), -1).cpu().numpy())
idxs, encodings = np.array(idxs), np.stack(encodings)
# X = umap.UMAP(n_jobs=-1, n_components=8).fit_transform(encodings)
np.save(f'{args.specie}/encodings/encodings_wav2vec2.npy', {'idxs':idxs, 'encodings':encodings}) #, 'umap8':X})
exit()
else:
dic = np.load(f'{args.specie}/encodings/encodings_wave2vec2.npy', allow_pickle=True).item()
idxs, encodings, X = dic['idxs'], dic['encodings'], dic['umap8']
clusters = hdbscan.HDBSCAN(min_cluster_size=10, min_samples=3, cluster_selection_epsilon=0.1, core_dist_n_jobs=-1, cluster_selection_method='leaf').fit_predict(X)
df.loc[idxs, 'cluster'] = clusters.astype(int)
mask = ~df.loc[idxs].label.isna()
clusters, labels = clusters[mask], df.loc[idxs[mask]].label
print('NMI', metrics.normalized_mutual_info_score(labels, clusters))
exit()
#print('Found clusters : \n', pd.Series(clusters).value_counts())
plt.figure(figsize=(20, 10))
plt.scatter(X[clusters==-1,0], X[clusters==-1,1], s=2, alpha=.2, color='Grey')
plt.scatter(X[clusters!=-1,0], X[clusters!=-1,1], s=2, c=clusters[clusters!=-1], cmap='tab20')
plt.tight_layout()
plt.savefig(f'{args.specie}/projections/vggish_projection_clusters.png')
plt.figure(figsize=(20, 10))
plt.scatter(X[~mask,0], X[~mask,1], s=2, alpha=.2, color='Grey')
for l, grp in df.groupby('label'):
plt.scatter(X[df.loc[idxs].label==l, 0], X[df.loc[idxs].label==l, 1], s=4, label=l)
plt.legend()
plt.tight_layout()
plt.savefig(f'{args.specie}/projections/vggish_projection_labels.png')
clusters, labels = clusters[mask], df.loc[idxs[mask]].label
print('Silhouette', metrics.silhouette_score(encodings[mask], clusters))
print('NMI', metrics.normalized_mutual_info_score(labels, clusters))
print('Homogeneity', metrics.homogeneity_score(labels, clusters))
print('Completeness', metrics.completeness_score(labels, clusters))
print('V-Measure', metrics.v_measure_score(labels, clusters))
labelled = df[~df.label.isna()]
for l, grp in labelled.groupby('label'):
best = (grp.groupby('cluster').fn.count() / labelled.groupby('cluster').fn.count()).idxmax()
print(f'Best precision for {l} is for cluster {best} with {(df.cluster==best).sum()} points, \
with precision {((labelled.cluster==best)&(labelled.label==l)).sum()/(labelled.cluster==best).sum():.2f} and recall {((labelled.cluster==best)&(labelled.label==l)).sum()/(labelled.label==l).sum():.2f}')
import matplotlib.pyplot as plt
#import matplotlib.pyplot as plt
import models, utils as u
import pandas as pd, numpy as np, torch
import argparse, os
......@@ -10,21 +10,26 @@ torch.multiprocessing.set_sharing_strategy('file_system')
parser = argparse.ArgumentParser()
parser.add_argument("specie", type=str)
parser.add_argument("-bottleneck", type=int, default=16)
parser.add_argument("-bottleneck", type=int, default=256)
parser.add_argument("-nMel", type=int, default=128)
parser.add_argument("-prcptl", type=int, default=1)
parser.add_argument("-encoder", type=str, default='sparrow_encoder')
parser.add_argument("-frontend", type=str, default='logMel')
args = parser.parse_args()
modelname = f'{args.specie}_{args.bottleneck}_{args.frontend}{args.nMel if "Mel" in args.frontend else ""}_{args.encoder}_decod2_BN_nomaxPool.stdc'
modelname = f'{args.specie}_{args.bottleneck}_{args.frontend}{args.nMel if "Mel" in args.frontend else ""}_{args.encoder}_decod2_BN_nomaxPool{"_noprcptl" if args.prcptl==0 else ""}.stdc'
meta = models.meta[args.specie]
df = pd.read_csv(f'{args.specie}/{args.specie}.csv')
print(f'Tests for model {modelname}')
print(f'{len(df)} available vocs')
if os.path.isfile(f'{args.specie}/encodings/encodings_{modelname[:-4]}npy'):
dic = np.load(f'{args.specie}/encodings/encodings_{modelname[:-4]}npy', allow_pickle=True).item()
idxs, encodings, X = dic['idxs'], dic['encodings'], dic['umap']
X = None
fn = f'{args.specie}/encodings/encodings_' + (modelname[:-5] if not args.frontend in ['vggish', 'biosound', 'spec32', 'openl3', 'wav2vec2', 'crepe'] else args.frontend) +'.npy'
if os.path.isfile(fn):
dic = np.load(fn, allow_pickle=True).item()
idxs, encodings = dic['idxs'], dic['encodings']
if 'umap8' in dic.keys():
print('\o/')
X = dic['umap8']
else:
gpu = torch.device('cuda')
frontend = models.frontend[args.frontend](meta['sr'], meta['nfft'], meta['sampleDur'], args.nMel)
......@@ -42,15 +47,31 @@ else:
encodings.extend(encoding.cpu().detach())
idxs, encodings = np.array(idxs), np.stack(encodings)
X = umap.UMAP(n_jobs=-1).fit_transform(encodings)
np.save(f'{args.specie}/encodings/encodings_{modelname[:-4]}npy', {'idxs':idxs, 'encodings':encodings, 'umap':X})
X = umap.UMAP(n_jobs=-1, n_components=8).fit_transform(encodings)
np.save(f'{args.specie}/encodings/encodings_{modelname[:-4]}npy', {'idxs':idxs, 'encodings':encodings, 'umap8':X})
clusters = hdbscan.HDBSCAN(min_cluster_size=len(df)//100, min_samples=5, core_dist_n_jobs=-1, cluster_selection_method='eom').fit_predict(X)
#clusters = hdbscan.HDBSCAN(min_cluster_size=20, core_dist_n_jobs=-1, cluster_selection_method='leaf').fit_predict(X)
# clusters = hdbscan.HDBSCAN(min_cluster_size=10, min_samples=3, cluster_selection_epsilon=0.05, core_dist_n_jobs=-1, cluster_selection_method='leaf').fit_predict(X)
# clusters = hdbscan.HDBSCAN(min_cluster_size=len(df)//100, min_samples=5, core_dist_n_jobs=-1, cluster_selection_method='eom').fit_predict(X)
#clusters = hdbscan.HDBSCAN(min_cluster_size=10, core_dist_n_jobs=-1, cluster_selection_method='leaf').fit_predict(X)
if X is None:
X = umap.UMAP(n_jobs=-1, n_components=8).fit_transform(encodings)
print('/!\ no UMAP')
clusters = hdbscan.HDBSCAN(min_cluster_size=10, min_samples=3, cluster_selection_epsilon=.1, cluster_selection_method='leaf', core_dist_n_jobs=-1).fit_predict(X)
df.loc[idxs, 'cluster'] = clusters.astype(int)
mask = ~df.loc[idxs].label.isna()
if True:
clusters, labels = clusters[mask], df.loc[idxs[mask]].label
print('Silhouette', metrics.silhouette_score(encodings[mask], clusters))
nmi = metrics.normalized_mutual_info_score(labels, clusters)
print('NMI', nmi)
f = open('tests.csv', mode='a')
frontend = f'{args.bottleneck}_{args.frontend}{args.nMel if "Mel" in args.frontend else ""}'
f.writelines(f'{args.specie},{frontend}{"_noprcptl" if args.prcptl==0 else ""},{nmi},10,3,0.1,leaf,8\n')
f.close()
exit()
#print('Found clusters : \n', pd.Series(clusters).value_counts())
plt.figure(figsize=(20, 10))
......
import matplotlib.pyplot as plt
import models, utils as u
import pandas as pd, numpy as np, torch
import argparse, os
from tqdm import tqdm
from sklearn import metrics
import umap, hdbscan
torch.multiprocessing.set_sharing_strategy('file_system')
parser = argparse.ArgumentParser()
parser.add_argument("specie", type=str)
parser.add_argument("-bottleneck", type=int, default=256)
parser.add_argument("-nMel", type=int, default=128)
parser.add_argument("-prcptl", type=int, default=1)
parser.add_argument("-encoder", type=str, default='sparrow_encoder')
parser.add_argument("-frontend", type=str, default='logMel')
args = parser.parse_args()
#modelname = f'ALLbut_{args.specie}_{args.bottleneck}_{args.frontend}{args.nMel if "Mel" in args.frontend else ""}_{args.encoder}_decod2_BN_nomaxPool{"_noprcptl" if args.prcptl==0 else ""}.stdc'
modelname = f'ALL_{args.bottleneck}_{args.frontend}{args.nMel if "Mel" in args.frontend else ""}_{args.encoder}_decod2_BN_nomaxPool{"_noprcptl" if args.prcptl==0 else ""}.stdc'
meta = models.meta[args.specie]
df = pd.read_csv(f'{args.specie}/{args.specie}.csv')
print(f'Tests for model {modelname}')
print(f'{len(df)} available vocs')
X = None
fn = f'{args.specie}/encodings/encodings_' + (modelname[:-5] if not args.frontend in ['vggish', 'biosound', 'spec32', 'openl3'] else args.frontend) +'.npy'
if os.path.isfile(fn):
dic = np.load(fn, allow_pickle=True).item()
idxs, encodings = dic['idxs'], dic['encodings']
if 'umap8' in dic.keys():
print('\o/')
X = dic['umap8']
else:
gpu = torch.device('cuda')
frontend = models.frontend[args.frontend](meta['sr'], meta['nfft'], meta['sampleDur'], args.nMel).to(gpu)
encoder = models.__dict__[args.encoder](*((args.bottleneck // 16, (4, 4)) if args.nMel == 128 else (args.bottleneck // 8, (2, 4))))
decoder = models.sparrow_decoder(args.bottleneck, (4, 4) if args.nMel == 128 else (2, 4))
model = torch.nn.Sequential(encoder, decoder).to(gpu)
# model.load_state_dict(torch.load(f'{args.specie}/weights/{modelname}'))
model.load_state_dict(torch.load(f'{modelname}'))
model.eval()
loader = torch.utils.data.DataLoader(u.Dataset(df, f'{args.specie}/audio/', meta['sr'], meta['sampleDur']), batch_size=64, shuffle=True, num_workers=8, collate_fn=u.collate_fn)
with torch.no_grad():
encodings, idxs = [], []
for x, idx in tqdm(loader, desc='test '+args.specie, leave=False):
encoding = model[0](frontend(x.to(gpu)))
idxs.extend(idx)
encodings.extend(encoding.cpu().detach())
idxs, encodings = np.array(idxs), np.stack(encodings)
X = umap.UMAP(n_jobs=-1, n_components=8).fit_transform(encodings)
np.save(f'{args.specie}/encodings/encodings_{modelname[:-4]}npy', {'idxs':idxs, 'encodings':encodings, 'umap8':X})
# clusters = hdbscan.HDBSCAN(min_cluster_size=len(df)//100, min_samples=5, core_dist_n_jobs=-1, cluster_selection_method='eom').fit_predict(X)
#clusters = hdbscan.HDBSCAN(min_cluster_size=10, core_dist_n_jobs=-1, cluster_selection_method='leaf').fit_predict(X)
if X is None:
X = umap.UMAP(n_jobs=-1, n_components=8).fit_transform(encodings)
clusters = hdbscan.HDBSCAN(min_cluster_size=10, min_samples=3, cluster_selection_epsilon=.1, cluster_selection_method='leaf', core_dist_n_jobs=-1).fit_predict(X)
df.loc[idxs, 'cluster'] = clusters.astype(int)
mask = ~df.loc[idxs].label.isna()
if True:
clusters, labels = clusters[mask], df.loc[idxs[mask]].label
print('Silhouette', metrics.silhouette_score(encodings[mask], clusters))
nmi = metrics.normalized_mutual_info_score(labels, clusters)
print('NMI', nmi)
f = open('tests.csv', mode='a')
frontend = f'all_AE_{args.bottleneck}_{args.frontend}{args.nMel if "Mel" in args.frontend else ""}'
f.writelines(f'{args.specie},{frontend}{"_noprcptl" if args.prcptl==0 else ""},{nmi},10,3,0.1,leaf,8\n')
f.close()
exit()
#print('Found clusters : \n', pd.Series(clusters).value_counts())
plt.figure(figsize=(20, 10))
plt.scatter(X[clusters==-1,0], X[clusters==-1,1], s=2, alpha=.2, color='Grey')
plt.scatter(X[clusters!=-1,0], X[clusters!=-1,1], s=2, c=clusters[clusters!=-1], cmap='tab20')
plt.tight_layout()
plt.savefig(f'{args.specie}/projections/{modelname[:-5]}_projection_clusters.png')
plt.figure(figsize=(20, 10))
plt.scatter(X[~mask,0], X[~mask,1], s=2, alpha=.2, color='Grey')
for l, grp in df.groupby('label'):
plt.scatter(X[df.loc[idxs].label==l, 0], X[df.loc[idxs].label==l, 1], s=4, label=l)
plt.legend()
plt.tight_layout()
plt.savefig(f'{args.specie}/projections/{modelname[:-5]}_projection_labels.png')
clusters, labels = clusters[mask], df.loc[idxs[mask]].label
print('Silhouette', metrics.silhouette_score(encodings[mask], clusters))
print('NMI', metrics.normalized_mutual_info_score(labels, clusters))
print('Homogeneity', metrics.homogeneity_score(labels, clusters))
print('Completeness', metrics.completeness_score(labels, clusters))
print('V-Measure', metrics.v_measure_score(labels, clusters))
labelled = df[~df.label.isna()]
goodClusters = []
for l, grp in labelled.groupby('label'):
precisions = grp.groupby('cluster').fn.count() / labelled.groupby('cluster').fn.count()
best = precisions.idxmax()
goodClusters.extend(precisions[precisions > 0.9].index)
print(f'Best precision for {l} is for cluster {best} with {(df.cluster==best).sum()} points, \
with precision {((labelled.cluster==best)&(labelled.label==l)).sum()/(labelled.cluster==best).sum():.2f}\
and recall {((labelled.cluster==best)&(labelled.label==l)).sum()/(labelled.label==l).sum():.2f}')
print(f'{len(goodClusters)} clusters would sort {df.cluster.isin(goodClusters).sum()/len(df)*100:.0f}% of samples')
print(f'{len(goodClusters)/df.label.nunique():.1f} cluster per label in avg)')
from torchvision.utils import make_grid
from torch.utils.tensorboard import SummaryWriter
import torch
import numpy as np, pandas as pd
import utils as u, models
from tqdm import tqdm
import os, argparse, warnings
torch.multiprocessing.set_sharing_strategy('file_system')
warnings.filterwarnings("error")
parser = argparse.ArgumentParser()
parser.add_argument("specie", type=str)
parser.add_argument("-bottleneck", type=int, default=16)
parser.add_argument("-frontend", type=str, default='logMel')
parser.add_argument("-encoder", type=str, default='sparrow_encoder')
parser.add_argument("-nMel", type=int, default=128)
parser.add_argument("-lr", type=float, default=3e-3)
parser.add_argument("-lr_decay", type=float, default=1e-2)
parser.add_argument("-batch_size", type=int, default=128)
parser.add_argument("-cuda", type=int, default=0)
args = parser.parse_args()
species = np.loadtxt('good_species.txt', dtype=str)
modelname = f'ALL_{args.bottleneck}_{args.frontend}{args.nMel if "Mel" in args.frontend else ""}_{args.encoder}_decod2_BN_nomaxPool.stdc'
#modelname = f'ALLbut_{args.specie}_{args.bottleneck}_{args.frontend}{args.nMel if "Mel" in args.frontend else ""}_{args.encoder}_decod2_BN_nomaxPool.stdc'
gpu = torch.device(f'cuda:{args.cuda}')
writer = SummaryWriter(f'runs3/{modelname}')
os.system(f'cp *.py runs3/{modelname}')
vgg16 = models.vgg16
vgg16.eval().to(gpu)
frontends = [models.frontend[args.frontend](models.meta[s]['sr'], models.meta[s]['nfft'], models.meta[s]['sampleDur'], args.nMel).to(gpu) for s in species] # if s != args.specie]
encoder = models.__dict__[args.encoder](args.bottleneck // (args.nMel//32 * 4), (args.nMel//32, 4))
decoder = models.sparrow_decoder(args.bottleneck, (args.nMel//32, 4))
model = torch.nn.Sequential(encoder, decoder).to(gpu)
print('Go for model '+modelname)
optimizer = torch.optim.AdamW(model.parameters(), weight_decay=0, lr=args.lr, betas=(0.8, 0.999))
scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lambda epoch : (1-args.lr_decay)**epoch)
loaders = [torch.utils.data.DataLoader(u.Dataset(pd.read_csv(f'{s}/{s}.csv'), f'{s}/audio/', models.meta[s]['sr'], models.meta[s]['sampleDur']),\
batch_size=20, shuffle=True, num_workers=8, prefetch_factor=8, collate_fn=u.collate_fn) for s in species] # if s != args.specie]
iterators = [iter(l) for l in loaders]
MSE = torch.nn.MSELoss()
for step in tqdm(range(15_000)):
batch = []
for i in range(len(species)): #-1):
try:
x, name = next(iterators[i])
except (StopIteration):
iterators[i] = iter(loaders[i])
x, name = next(iterators[i])
batch.append(frontends[i](x.to(gpu)))
label = torch.vstack(batch)
optimizer.zero_grad()
x = encoder(label)
pred = decoder(x)
assert not torch.isnan(pred).any(), "found a NaN :'("
predd = vgg16(pred.expand(pred.shape[0], 3, *pred.shape[2:]))
labell = vgg16(label.expand(label.shape[0], 3, *label.shape[2:]))
score = MSE(predd, labell)
score.backward()
optimizer.step()
writer.add_scalar('loss', score.item(), step)
step += 1
if step % 50 == 0:
# Plot reconstructions
images = [(e-e.min())/(e.max()-e.min()) for e in label[:160:20]]
grid = make_grid(images)
writer.add_image('target', grid, step)
# writer.add_embedding(x.detach(), global_step=step, label_img=label)
images = [(e-e.min())/(e.max()-e.min()) for e in pred[:160:20]]
grid = make_grid(images)
writer.add_image('reconstruct', grid, step)
torch.save(model.state_dict(), f'{args.specie}/weights/{modelname}')
scheduler.step()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment