Skip to content
Snippets Groups Projects
Commit ad9ca72d authored by ferrari's avatar ferrari
Browse files

Added new on the fly corr computation

parent ccccf9c6
No related branches found
No related tags found
No related merge requests found
......@@ -23,6 +23,8 @@ usage: gsrp_tdoa_hyperres.py [-h] [-c CHANNELS] [-i INVERSE] [-f FRAME_SIZE]
Arguments
-----
```
Computes TDOA estimates from a multi-channel recording.
positional arguments:
infile The sound file to process.
outfile The text or npy file to write results to. Each row
......@@ -34,6 +36,8 @@ positional arguments:
optional arguments:
-h, --help show this help message and exit
Channels:
-c CHANNELS, --channels CHANNELS
The channels to cross-correlate. Accepts two or more,
but beware of high memory use. To be given as a comma-
......@@ -43,15 +47,23 @@ optional arguments:
Inverse the channel. To be given as a comma-separated
list of numbers,with 0 referring to the first channel
once channels have been chosen by --channels.
Size settings:
-f FRAME_SIZE, --frame-size FRAME_SIZE
The size of the cross-correlation frames in seconds
(default: 0.2)
-s HOP_SIZE, --hop-size HOP_SIZE
-s STRIDE, --stride STRIDE
The step between the beginnings of sequential frames
in seconds (default: 0.01), or the postion in second
if csv file path is given.
-m MAX_TDOA, --max-tdoa MAX_TDOA
The maximum TDOA in seconds (default: 0.0011).
-S SECONDS, --start SECONDS
If given, only analyze from the given position.
-E SECONDS, --end SECONDS
If given, only analyze up to the given position.
Filtering:
-l LOW, --low LOW Bottom cutoff frequency. Disabled by default.
-u UP, --up UP Top cutoff frequency. Disabled by default.
-d DECIMATE, --decimate DECIMATE
......@@ -59,8 +71,23 @@ optional arguments:
default
-t, --temporal If given, any decimation will be applied in the time
domain instead of the spectral domain.
-e, --erase Erase existing outfile. If outfile existe and --erase
Other:
-e, --erase Erase existing outfile. If outfile exist and --erase
is not provide, the script will exit.
--start SECONDS If given, only analyze from the given position.
--end SECONDS If given, only analyze up to the given position.
-n, --no-hyperres Disable the hyper resolution evalutation of the TDOA
-M {estimate,auto,on-the-fly,prepare}, --mode {estimate,auto,on-the-fly,prepare}
How to explore the TDOA space (default: prepare).
'prepare' precomputes all the possible TDOA
pairs and then evaluate them. All the results are save
in memory.
'on-the-fly' compute the TDOA pairs at the same
time as it compute the loss function. Only the maximum
is saved. Can be slower than 'prepare'.
'estimate' try to reduce the number of TDOA to
evaluate.
'auto' automatically try to pick the right
method.
Process finished with exit code 0
```
......@@ -20,7 +20,7 @@ ctypedef fused npType2:
@cython.boundscheck(False)
@cython.wraparound(False)
def c_corr(npType1[:,::1] cc, npType2[::1, :] taus):
def c_corr_at(npType1[:,::1] cc, npType2[::1, :] taus):
total = taus.shape[1]
n_pair = taus.shape[0]
cdef double max_corr_val = 0.0
......@@ -34,3 +34,47 @@ def c_corr(npType1[:,::1] cc, npType2[::1, :] taus):
max_corr_val = corr_val
index = i
return np.log10(max_corr_val), index
@cython.boundscheck(False)
@cython.wraparound(False)
def c_corr_all(npType1[:,::1] cc, npType2 max_tdoa, npType2 base_tdoa):
cdef double corr_val = 0.0
taus = np.zeros(cc.shape[0], int)
out_taus = np.zeros(cc.shape[0], int)
_c_corr_core(cc, taus, out_taus, corr_val, base_tdoa-1, base_tdoa, max_tdoa, cc.shape[1], cc.shape[0])
return np.log10(corr_val), out_taus
@cython.boundscheck(False)
@cython.wraparound(False)
def _c_corr_core(npType1[:,::1] cc, npType2[::1] taus, npType2[::1] out_taus, double val,
npType2 curr_tdoa, npType2 ind_tdoa, npType2 max_tdoa, npType2 win_size, npType2 n_pair):
if curr_tdoa < 0:
cdef int i0 = 1
cdef int j0 = 0
for i in range(ind_tdoa, n_pair):
taus[i] = taus[i0] - taus[j0]
i0 += 1
if i0 >= ind_tdoa:
j0 += 1
i0 = j0 + 1
val = cc[0, taus[0]]
for i in range(1, n_pair):
val *= cc[i, taus[i] % win_size]
return val
cdef int low = -max_tdoa
cdef int high = max_tdoa
cdef double tmp_val = 0
for i in range(curr_tdoa+1, ind_tdoa):
if low < taus[i] - max_tdoa:
low = -max_tdoa + taus[i]
if high > max_tdoa + taus[i]:
high = max_tdoa + taus[i]
for t in range(low, high+1):
taus[curr_tdoa] = t
tmp_val = _c_corr_core(cc, taus, out_taus, val, curr_tdoa-1, ind_tdoa, max_size, win_size, n_pair)
if val < tmp_val:
val = tmp_val
out_taus[:] = taus
return val
\ No newline at end of file
......@@ -9,8 +9,9 @@ import numpy as np
from numpy.fft import rfft, irfft
import scipy.signal as sg
import soundfile as sf
from c_corr import c_corr
import c_corr
from scipy.signal.windows import tukey
try:
from tqdm import trange
except ImportError:
......@@ -41,18 +42,13 @@ def slicer(down, up, ndim, n):
return slices[index].reshape(ndim, -1).T
def corr(data, sr, pos, w_size, max_tdoa, decimate=1):
def corr(data, pos, w_size, max_tdoa, decimate=1, mode='prepare', hyper=True):
num_channels = data.shape[1]
num_channel_pairs = num_channels * (num_channels - 1) // 2
# apply zero-padding
data = np.pad(data, (((w_size - 1) // 2, (w_size - 1) // 2), (0, 0)),
'constant')
# prepare window
win = np.expand_dims(tukey(w_size, 0.2), -1)
# prepare index matrices
data = np.pad(data, [((w_size - 1) // 2, (w_size - 1) // 2), (0, 0)], 'constant')
win = tukey(w_size, 0.2)[:, np.newaxis]
cc_size = min(w_size, int(2 * max_tdoa // decimate))
v1 = np.empty(num_channel_pairs, np.int8)
v2 = np.empty(num_channel_pairs, np.int8)
mat = np.zeros((num_channel_pairs, num_channels - 1), np.int8)
......@@ -62,7 +58,7 @@ def corr(data, sr, pos, w_size, max_tdoa, decimate=1):
mat[k, j - 1] = 1
v1[k] = i
v2[k] = j
cc_size = min(w_size, int(2 * max_tdoa // decimate))
if mode == 'prepare':
slices = slicer(-(cc_size // 2), cc_size // 2, (num_channels - 1), 16)
tausf = []
for j in range(len(slices)):
......@@ -72,15 +68,36 @@ def corr(data, sr, pos, w_size, max_tdoa, decimate=1):
tausf = np.hstack(tausf)
dw_size = w_size // decimate
tausf %= dw_size
elif mode == 'on-the-fly':
pass
else:
raise ValueError(f'Unknown mode {mode}')
# run the TDOA search
tdoas = np.zeros((len(pos), num_channels), np.float32)
tdoas2 = np.zeros((len(pos), num_channels -1), np.float32)
cc = np.empty((num_channel_pairs, dw_size), np.float32)
if hyper: # prepare hyper res
tdoas2 = np.zeros((len(pos), num_channels), np.float32)
poly = PolynomialFeatures(2)
lin = LinearRegression()
pipe = Pipeline([('poly', poly), ('lin', lin)])
ind = np.triu_indices(num_channels - 1)
def _hyperres(taus, cc):
taus = taus + np.stack(np.meshgrid(*(num_channels - 1) * (np.arange(-2, 3),)), 0).reshape(
num_channels - 1, -1).T
taus = np.matmul(mat, taus.T)
taus = taus[:, np.abs(taus).max(0) <= cc_size // 2]
mean = taus.mean(-1)[:3]
coef = pipe.fit(taus.T[:, :3] - mean,
cc[np.expand_dims(np.arange(num_channel_pairs), 1), taus.astype(np.int)].prod(0)
).named_steps['lin'].coef_
der = np.zeros((num_channels - 1, num_channels - 1))
der[ind] = coef[4:]
poly_min = np.linalg.lstsq(der + der.T, -coef[1:4], rcond=None)[0] + mean
return pipe.predict(poly_min), poly_min
cc = np.empty((num_channel_pairs, dw_size), np.float32)
for i in trange(len(pos)):
fft = rfft(win * data[pos[i]:w_size + pos[i]], axis=0)
if decimate > 1:
......@@ -88,22 +105,24 @@ def corr(data, sr, pos, w_size, max_tdoa, decimate=1):
fft = np.asarray(fft, dtype=np.complex64)
cc[:] = irfft(fft[:, v2] * np.conj(fft[:, v1]), axis=0).T
cc -= cc.min(-1, keepdims=True)
val, index = c_corr(cc, tausf)
tdoas[i, 0] = val
if mode == 'prepare':
tdoas[i, 0], index = c_corr.c_corr_at(cc, tausf)
tdoas[i, 1:] = ((tausf[:(num_channels - 1), index] + dw_size // 2) % dw_size) - dw_size // 2
elif mode == 'on-the-fly':
tdoas[i, 0], tdoas[i, 1:] = c_corr.c_corr_all(cc, cc_size//2, num_channels - 1)
else:
raise ValueError(f'Unknown mode {mode}')
# hyper resolution
taus = tdoas[i,1:num_channels] + np.stack(np.meshgrid(*(num_channels-1) * (np.arange(-2,3),)),0).reshape(num_channels-1, -1).T
taus = np.matmul(mat, taus.T)
taus = taus[:, np.abs(taus).max(0) <= cc_size // 2]
mean = taus.mean(-1)[:3]
coef = pipe.fit((taus).T[:,:3] - mean,
cc[np.expand_dims(np.arange(num_channel_pairs), 1), taus.astype(np.int)].prod(0)
).named_steps['lin'].coef_
der = np.zeros((num_channels -1, num_channels - 1))
der[ind] = coef[4:]
tdoas2[i] = np.linalg.lstsq(der + der.T, -coef[1:4], rcond=None)[0] + mean
if hyper:
tdoas[i, 0], tdoas2[i, 1:] = _hyperres(tdoas[i, 1:], cc)
if hyper:
return np.hstack((np.expand_dims(pos, -1), tdoas)), np.hstack((np.expand_dims(pos, -1), tdoas2))
else:
return np.hstack((np.expand_dims(pos, -1), tdoas))
def main(args):
......@@ -114,7 +133,8 @@ def main(args):
# load audio file
print(f'Loading {args.infile}...')
sr = sf.info(args.infile).samplerate
sound, sr = sf.read(args.infile, start=int(sr*args.start), stop=int(sr*args.end) if args.end is not None else None,
sound, sr = sf.read(args.infile, start=int(sr * args.start),
stop=int(sr * args.end) if args.end is not None else None,
dtype=np.float32, always_2d=True)
if args.channels is not None:
sound = sound[:, args.channels]
......@@ -144,20 +164,17 @@ def main(args):
sr /= args.decimate
# Position where the TDOAs are computed
if os.path.isfile(args.hop_size):
pos = (sr * np.loadtxt(args.hop_size, delimiter=',')).astype(int).ravel()
if os.path.isfile(args.stride):
pos = (sr * np.loadtxt(args.stride, delimiter=',')).astype(int).ravel()
else:
try:
pos = np.arange(0, len(sound), int(sr * float(args.hop_size)))
pos = np.arange(0, len(sound), int(sr * float(args.stride)))
except ValueError:
raise ValueError(f'Error: hop size {args.hop_size} is neither an existing file nor a float')
raise ValueError(f'Error: hop size {args.stride} is neither an existing file nor a float')
print("Computing TDOAs...")
result1, result2 = corr(sound, sr, pos, int(sr * args.frame_size),
max_tdoa=int(np.ceil(sr * args.max_tdoa)),
decimate=(args.decimate
if not args.temporal else 1))
result1 = corr(sound, pos, int(sr * args.frame_size), max_tdoa=int(np.ceil(sr * args.max_tdoa)),
decimate=args.decimate if not args.temporal else 1, mode=args.mode, hyper=not args.no_hyperes)
# compute additional non-independent TDOAs
additional1 = [(b - a) for a, b in itertools.combinations(result1.T[2:], 2)]
......@@ -176,7 +193,21 @@ def main(args):
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Computes TDOA estimates from a multi-channel recording.')
class SmartFormatter(argparse.HelpFormatter):
"""
Allow to change argparse formating behaviour for one option by adding \'R|\' at the start of the help
"""
def _split_lines(self, text, width):
if text.startswith('R|'):
return [l for t in text[2:].splitlines() for l in argparse.HelpFormatter._split_lines(self, t, width)]
# this is the RawTextHelpFormatter._split_lines
return argparse.HelpFormatter._split_lines(self, text, width)
parser = argparse.ArgumentParser(description='Computes TDOA estimates from a multi-channel recording.',
formatter_class=SmartFormatter)
parser.add_argument('infile', type=str, help='The sound file to process.')
parser.add_argument('outfile', type=str, help='The text or npy file to write results to. Each row gives the '
'position (in samples), cross-correlation product, the independent '
......@@ -184,38 +215,59 @@ if __name__ == "__main__":
'For plots, the panes give the cross-correlation product, '
'independent TDOAS and derived TDOAs from top to bottom.')
parser.add_argument('-c', '--channels', type=intlist, default=None,
group1 = parser.add_argument_group('Channels')
group1.add_argument('-c', '--channels', type=intlist, default=None,
help='The channels to cross-correlate. Accepts two or more, but beware of high memory use. To '
'be given as a comma-separated list of numbers, with 0 referring to the first channel '
'(default: all channels).')
parser.add_argument('-i', '--inverse', type=intlist, default=None,
group1.add_argument('-i', '--inverse', type=intlist, default=None,
help='Inverse the channel. To be given as a comma-separated list of numbers,'
'with 0 referring to the first channel once channels have been chosen by --channels.')
parser.add_argument('-f', '--frame-size', type=float, default=0.2,
group2 = parser.add_argument_group('Size settings')
group2.add_argument('-f', '--frame-size', type=float, default=0.2,
help='The size of the cross-correlation frames in seconds (default: %(default)s)')
parser.add_argument('-s', '--hop-size', type=str, default=0.01,
group2.add_argument('-s', '--stride', type=str, default=0.01,
help='The step between the beginnings of sequential frames in seconds (default: %(default)s), '
'or the postion in second if csv file path is given.')
parser.add_argument('-m', '--max-tdoa', type=float, default=0.0011,
group2.add_argument('-m', '--max-tdoa', type=float, default=0.0011,
help='The maximum TDOA in seconds (default: %(default)s).')
parser.add_argument('-l', '--low', type=float, default=None, help='Bottom cutoff frequency. Disabled by default.')
parser.add_argument('-u', '--up', type=float, default=None, help='Top cutoff frequency. Disabled by default.')
group2.add_argument('-S', '--start', metavar='SECONDS', type=float, default=0,
help='If given, only analyze from the given position.')
group2.add_argument('-E', '--end', metavar='SECONDS', type=float, default=None,
help='If given, only analyze up to the given position.')
parser.add_argument('-d', '--decimate', type=int, default=1, help='Downsample the signal by the given factor. '
group3 = parser.add_argument_group('Filtering')
group3.add_argument('-l', '--low', type=float, default=None, help='Bottom cutoff frequency. Disabled by default.')
group3.add_argument('-u', '--up', type=float, default=None, help='Top cutoff frequency. Disabled by default.')
group3.add_argument('-d', '--decimate', type=int, default=1, help='Downsample the signal by the given factor. '
'Disabled by default')
parser.add_argument('-t', '--temporal', action='store_true', help='If given, any decimation will be applied in the '
group3.add_argument('-t', '--temporal', action='store_true', help='If given, any decimation will be applied in the '
'time domain instead of the spectral domain.')
parser.add_argument('-e', '--erase', action='store_false', help='Erase existing outfile. If outfile existe and '
group4 = parser.add_argument_group('Other')
group4.add_argument('-e', '--erase', action='store_false', help='Erase existing outfile. If outfile exist and '
'--erase is not provide, the script will exit.')
parser.add_argument('--start', metavar='SECONDS', type=float, default=0,
help='If given, only analyze from the given position.')
parser.add_argument('--end', metavar='SECONDS', type=float, default=None,
help='If given, only analyze up to the given position.')
group4.add_argument('-n', '--no-hyperres', action='store_True', help='Disable the hyper resolution evalutation of '
'the TDOA')
group4.add_argument('-M', '--mode', choices={'prepare', 'on-the-fly', 'estimate', 'auto'}, default='prepare',
help=f'R|How to explore the TDOA space (default: %(default)s).\n'
f'{BColors.BOLD}prepare{BColors.ENDC} precomputes all the possible TDOA pairs and then '
f'evaluate them. All the results are save in memory.\n'
f'{BColors.BOLD}on-the-fly{BColors.ENDC} compute the TDOA pairs at the same time as it '
f'compute the loss function. Only the maximum is saved. Can be slower than '
f'{BColors.BOLD}prepare{BColors.ENDC}.\n'
f'{BColors.BOLD}estimate{BColors.ENDC} try to reduce the number of TDOA to evaluate.\n'
f'{BColors.BOLD}auto{BColors.ENDC} automatically try to pick the right method.')
args = parser.parse_args()
try:
if args.mode in ['estimate', 'auto']:
raise NotImplementedError(f'mode {args.mode} is not yet implemented')
sys.exit(main(args))
except Exception as e:
print(e)
sys.exit(2)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment