Added new on the fly corr computation

ad9ca72d · ferrari · ccccf9c6 · ad9ca72d · ad9ca72d · ad9ca72d
Commit ad9ca72d authored 3 years ago by ferrari
--- a/README.md
+++ b/README.md
@@ -23,6 +23,8 @@ usage: gsrp_tdoa_hyperres.py [-h] [-c CHANNELS] [-i INVERSE] [-f FRAME_SIZE]
 Arguments
 -----
 ```
+Computes TDOA estimates from a multi-channel recording.
+
 positional arguments:
  infile                The sound file to process.
  outfile               The text or npy file to write results to. Each row
@@ -34,6 +36,8 @@ positional arguments:

 optional arguments:
  -h, --help            show this help message and exit
+
+Channels:
  -c CHANNELS, --channels CHANNELS
                        The channels to cross-correlate. Accepts two or more,
                        but beware of high memory use. To be given as a comma-
@@ -43,15 +47,23 @@ optional arguments:
                        Inverse the channel. To be given as a comma-separated
                        list of numbers,with 0 referring to the first channel
                        once channels have been chosen by --channels.
+
+Size settings:
  -f FRAME_SIZE, --frame-size FRAME_SIZE
                        The size of the cross-correlation frames in seconds
                        (default: 0.2)
-  -s HOP_SIZE, --hop-size HOP_SIZE
+  -s STRIDE, --stride STRIDE
                        The step between the beginnings of sequential frames
                        in seconds (default: 0.01), or the postion in second
                        if csv file path is given.
  -m MAX_TDOA, --max-tdoa MAX_TDOA
                        The maximum TDOA in seconds (default: 0.0011).
+  -S SECONDS, --start SECONDS
+                        If given, only analyze from the given position.
+  -E SECONDS, --end SECONDS
+                        If given, only analyze up to the given position.
+
+Filtering:
  -l LOW, --low LOW     Bottom cutoff frequency. Disabled by default.
  -u UP, --up UP        Top cutoff frequency. Disabled by default.
  -d DECIMATE, --decimate DECIMATE
@@ -59,8 +71,23 @@ optional arguments:
                        default
  -t, --temporal        If given, any decimation will be applied in the time
                        domain instead of the spectral domain.
-  -e, --erase           Erase existing outfile. If outfile existe and --erase
+
+Other:
+  -e, --erase           Erase existing outfile. If outfile exist and --erase
                        is not provide, the script will exit.
-  --start SECONDS       If given, only analyze from the given position.
-  --end SECONDS         If given, only analyze up to the given position.
+  -n, --no-hyperres     Disable the hyper resolution evalutation of the TDOA
+  -M {estimate,auto,on-the-fly,prepare}, --mode {estimate,auto,on-the-fly,prepare}
+                        How to explore the TDOA space (default: prepare).
+                        'prepare' precomputes all the possible TDOA
+                        pairs and then evaluate them. All the results are save
+                        in memory.
+                        'on-the-fly' compute the TDOA pairs at the same
+                        time as it compute the loss function. Only the maximum
+                        is saved. Can be slower than 'prepare'.
+                        'estimate' try to reduce the number of TDOA to
+                        evaluate.
+                        'auto' automatically try to pick the right
+                        method.
+
+Process finished with exit code 0
 ```
--- a/c_corr.pyx
+++ b/c_corr.pyx
@@ -20,7 +20,7 @@ ctypedef fused npType2:

 @cython.boundscheck(False)
 @cython.wraparound(False)
-def c_corr(npType1[:,::1] cc, npType2[::1, :] taus):
+def c_corr_at(npType1[:,::1] cc, npType2[::1, :] taus):
    total = taus.shape[1]
    n_pair = taus.shape[0]
    cdef double max_corr_val = 0.0
@@ -34,3 +34,47 @@ def c_corr(npType1[:,::1] cc, npType2[::1, :] taus):
            max_corr_val = corr_val
            index = i
    return np.log10(max_corr_val), index
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def c_corr_all(npType1[:,::1] cc, npType2 max_tdoa, npType2 base_tdoa):
+    cdef double corr_val = 0.0
+    taus = np.zeros(cc.shape[0], int)
+    out_taus = np.zeros(cc.shape[0], int)
+    _c_corr_core(cc, taus, out_taus,  corr_val, base_tdoa-1, base_tdoa, max_tdoa, cc.shape[1], cc.shape[0])
+    return np.log10(corr_val), out_taus
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def _c_corr_core(npType1[:,::1] cc, npType2[::1] taus, npType2[::1] out_taus, double val,
+                 npType2 curr_tdoa, npType2 ind_tdoa, npType2 max_tdoa, npType2 win_size, npType2 n_pair):
+    if curr_tdoa < 0:
+        cdef int i0 = 1
+        cdef int j0 = 0
+        for i in range(ind_tdoa, n_pair):
+            taus[i] = taus[i0] - taus[j0]
+            i0 += 1
+            if i0 >= ind_tdoa:
+                j0 += 1
+                i0 = j0 + 1
+        val = cc[0, taus[0]]
+        for i in range(1, n_pair):
+            val *= cc[i, taus[i] % win_size]
+        return val
+    cdef int low = -max_tdoa
+    cdef int high = max_tdoa
+    cdef double tmp_val = 0
+    for i in range(curr_tdoa+1, ind_tdoa):
+        if low < taus[i] - max_tdoa:
+            low = -max_tdoa + taus[i]
+        if high > max_tdoa + taus[i]:
+            high = max_tdoa + taus[i]
+    for t in range(low, high+1):
+        taus[curr_tdoa] = t
+        tmp_val = _c_corr_core(cc, taus, out_taus, val, curr_tdoa-1, ind_tdoa, max_size, win_size, n_pair)
+        if val < tmp_val:
+            val = tmp_val
+            out_taus[:] = taus
+    return val
\ No newline at end of file
--- a/gsrp_tdoa_hyperres.py
+++ b/gsrp_tdoa_hyperres.py
@@ -9,8 +9,9 @@ import numpy as np
 from numpy.fft import rfft, irfft
 import scipy.signal as sg
 import soundfile as sf
-from c_corr import c_corr
+import c_corr
 from scipy.signal.windows import tukey
+
 try:
    from tqdm import trange
 except ImportError:
@@ -41,18 +42,13 @@ def slicer(down, up, ndim, n):
    return slices[index].reshape(ndim, -1).T


-def corr(data, sr, pos, w_size, max_tdoa, decimate=1):
+def corr(data, pos, w_size, max_tdoa, decimate=1, mode='prepare', hyper=True):
    num_channels = data.shape[1]
    num_channel_pairs = num_channels * (num_channels - 1) // 2

-    # apply zero-padding
-    data = np.pad(data, (((w_size - 1) // 2, (w_size - 1) // 2), (0, 0)),
-                  'constant')
-
-    # prepare window
-    win = np.expand_dims(tukey(w_size, 0.2), -1)
-
-    # prepare index matrices
+    data = np.pad(data, [((w_size - 1) // 2, (w_size - 1) // 2), (0, 0)], 'constant')
+    win = tukey(w_size, 0.2)[:, np.newaxis]
+    cc_size = min(w_size, int(2 * max_tdoa // decimate))
    v1 = np.empty(num_channel_pairs, np.int8)
    v2 = np.empty(num_channel_pairs, np.int8)
    mat = np.zeros((num_channel_pairs, num_channels - 1), np.int8)
@@ -62,7 +58,7 @@ def corr(data, sr, pos, w_size, max_tdoa, decimate=1):
        mat[k, j - 1] = 1
        v1[k] = i
        v2[k] = j
-    cc_size = min(w_size, int(2 * max_tdoa // decimate))
+    if mode == 'prepare':
        slices = slicer(-(cc_size // 2), cc_size // 2, (num_channels - 1), 16)
        tausf = []
        for j in range(len(slices)):
@@ -72,15 +68,36 @@ def corr(data, sr, pos, w_size, max_tdoa, decimate=1):
        tausf = np.hstack(tausf)
        dw_size = w_size // decimate
        tausf %= dw_size
+    elif mode == 'on-the-fly':
+        pass
+    else:
+        raise ValueError(f'Unknown mode {mode}')

-    # run the TDOA search
    tdoas = np.zeros((len(pos), num_channels), np.float32)
-    tdoas2 = np.zeros((len(pos), num_channels -1), np.float32)
-    cc = np.empty((num_channel_pairs, dw_size), np.float32)
+
+    if hyper:   # prepare hyper res
+        tdoas2 = np.zeros((len(pos), num_channels), np.float32)
        poly = PolynomialFeatures(2)
        lin = LinearRegression()
        pipe = Pipeline([('poly', poly), ('lin', lin)])
        ind = np.triu_indices(num_channels - 1)
+
+        def _hyperres(taus, cc):
+            taus = taus + np.stack(np.meshgrid(*(num_channels - 1) * (np.arange(-2, 3),)), 0).reshape(
+                num_channels - 1, -1).T
+            taus = np.matmul(mat, taus.T)
+            taus = taus[:, np.abs(taus).max(0) <= cc_size // 2]
+            mean = taus.mean(-1)[:3]
+            coef = pipe.fit(taus.T[:, :3] - mean,
+                            cc[np.expand_dims(np.arange(num_channel_pairs), 1), taus.astype(np.int)].prod(0)
+                            ).named_steps['lin'].coef_
+            der = np.zeros((num_channels - 1, num_channels - 1))
+            der[ind] = coef[4:]
+            poly_min = np.linalg.lstsq(der + der.T, -coef[1:4], rcond=None)[0] + mean
+            return pipe.predict(poly_min), poly_min
+
+
+    cc = np.empty((num_channel_pairs, dw_size), np.float32)
    for i in trange(len(pos)):
        fft = rfft(win * data[pos[i]:w_size + pos[i]], axis=0)
        if decimate > 1:
@@ -88,22 +105,24 @@ def corr(data, sr, pos, w_size, max_tdoa, decimate=1):
        fft = np.asarray(fft, dtype=np.complex64)
        cc[:] = irfft(fft[:, v2] * np.conj(fft[:, v1]), axis=0).T
        cc -= cc.min(-1, keepdims=True)
-        val, index = c_corr(cc, tausf)
-        tdoas[i, 0] = val
+        if mode == 'prepare':
+            tdoas[i, 0], index = c_corr.c_corr_at(cc, tausf)
            tdoas[i, 1:] = ((tausf[:(num_channels - 1), index] + dw_size // 2) % dw_size) - dw_size // 2
+        elif mode == 'on-the-fly':
+            tdoas[i, 0], tdoas[i, 1:] = c_corr.c_corr_all(cc, cc_size//2, num_channels - 1)
+        else:
+            raise ValueError(f'Unknown mode {mode}')

-        # hyper resolution
-        taus = tdoas[i,1:num_channels] + np.stack(np.meshgrid(*(num_channels-1) * (np.arange(-2,3),)),0).reshape(num_channels-1, -1).T
-        taus = np.matmul(mat, taus.T)
-        taus = taus[:, np.abs(taus).max(0) <= cc_size // 2]
-        mean = taus.mean(-1)[:3]
-        coef = pipe.fit((taus).T[:,:3] - mean,
-                        cc[np.expand_dims(np.arange(num_channel_pairs), 1), taus.astype(np.int)].prod(0)
-                        ).named_steps['lin'].coef_
-        der = np.zeros((num_channels -1, num_channels - 1))
-        der[ind] = coef[4:]
-        tdoas2[i] = np.linalg.lstsq(der + der.T, -coef[1:4], rcond=None)[0] + mean
+        if hyper:
+            tdoas[i, 0], tdoas2[i, 1:] = _hyperres(tdoas[i, 1:], cc)
+
+    if hyper:
        return np.hstack((np.expand_dims(pos, -1), tdoas)), np.hstack((np.expand_dims(pos, -1), tdoas2))
+    else:
+        return np.hstack((np.expand_dims(pos, -1), tdoas))
+
+
+


 def main(args):
@@ -114,7 +133,8 @@ def main(args):
    # load audio file
    print(f'Loading {args.infile}...')
    sr = sf.info(args.infile).samplerate
-    sound, sr = sf.read(args.infile, start=int(sr*args.start), stop=int(sr*args.end) if args.end is not None else None,
+    sound, sr = sf.read(args.infile, start=int(sr * args.start),
+                        stop=int(sr * args.end) if args.end is not None else None,
                        dtype=np.float32, always_2d=True)
    if args.channels is not None:
        sound = sound[:, args.channels]
@@ -144,20 +164,17 @@ def main(args):
        sr /= args.decimate

    # Position where the TDOAs are computed
-    if os.path.isfile(args.hop_size):
-        pos = (sr * np.loadtxt(args.hop_size, delimiter=',')).astype(int).ravel()
+    if os.path.isfile(args.stride):
+        pos = (sr * np.loadtxt(args.stride, delimiter=',')).astype(int).ravel()
    else:
        try:
-            pos = np.arange(0, len(sound), int(sr * float(args.hop_size)))
+            pos = np.arange(0, len(sound), int(sr * float(args.stride)))
        except ValueError:
-            raise ValueError(f'Error: hop size {args.hop_size} is neither an existing file nor a float')
-
+            raise ValueError(f'Error: hop size {args.stride} is neither an existing file nor a float')

    print("Computing TDOAs...")
-    result1, result2 = corr(sound, sr, pos, int(sr * args.frame_size),
-                  max_tdoa=int(np.ceil(sr * args.max_tdoa)),
-                  decimate=(args.decimate
-                            if not args.temporal else 1))
+    result1 = corr(sound, pos, int(sr * args.frame_size), max_tdoa=int(np.ceil(sr * args.max_tdoa)),
+                   decimate=args.decimate if not args.temporal else 1, mode=args.mode, hyper=not args.no_hyperes)

    # compute additional non-independent TDOAs
    additional1 = [(b - a) for a, b in itertools.combinations(result1.T[2:], 2)]
@@ -176,7 +193,21 @@ def main(args):


 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description='Computes TDOA estimates from a multi-channel recording.')
+    class SmartFormatter(argparse.HelpFormatter):
+        """
+        Allow to change argparse formating behaviour for one option by adding \'R|\' at the start of the help
+        """
+
+        def _split_lines(self, text, width):
+            if text.startswith('R|'):
+                return [l for t in text[2:].splitlines() for l in argparse.HelpFormatter._split_lines(self, t, width)]
+                # this is the RawTextHelpFormatter._split_lines
+            return argparse.HelpFormatter._split_lines(self, text, width)
+
+
+    parser = argparse.ArgumentParser(description='Computes TDOA estimates from a multi-channel recording.',
+                                     formatter_class=SmartFormatter)
+
    parser.add_argument('infile', type=str, help='The sound file to process.')
    parser.add_argument('outfile', type=str, help='The text or npy file to write results to. Each row gives the '
                                                  'position (in samples), cross-correlation product, the independent '
@@ -184,38 +215,59 @@ if __name__ == "__main__":
                                                  'For plots, the panes give the cross-correlation product, '
                                                  'independent TDOAS and derived TDOAs from top to bottom.')

-    parser.add_argument('-c', '--channels', type=intlist, default=None,
+    group1 = parser.add_argument_group('Channels')
+    group1.add_argument('-c', '--channels', type=intlist, default=None,
                        help='The channels to cross-correlate. Accepts two or more,  but beware of high memory use. To '
                             'be given as a comma-separated list of numbers, with 0 referring to the first channel '
                             '(default: all channels).')
-    parser.add_argument('-i', '--inverse', type=intlist, default=None,
+    group1.add_argument('-i', '--inverse', type=intlist, default=None,
                        help='Inverse the channel. To be given as a comma-separated list of numbers,'
                             'with 0 referring to the first channel once channels have been chosen by --channels.')

-    parser.add_argument('-f', '--frame-size', type=float, default=0.2,
+    group2 = parser.add_argument_group('Size settings')
+    group2.add_argument('-f', '--frame-size', type=float, default=0.2,
                        help='The size of the cross-correlation frames in seconds  (default: %(default)s)')
-    parser.add_argument('-s', '--hop-size', type=str, default=0.01,
+    group2.add_argument('-s', '--stride', type=str, default=0.01,
                        help='The step between the beginnings of sequential frames  in seconds (default: %(default)s), '
                             'or the postion in second if csv file path is given.')
-    parser.add_argument('-m', '--max-tdoa', type=float, default=0.0011,
+    group2.add_argument('-m', '--max-tdoa', type=float, default=0.0011,
                        help='The maximum TDOA in seconds (default: %(default)s).')

-    parser.add_argument('-l', '--low', type=float, default=None, help='Bottom cutoff frequency. Disabled by default.')
-    parser.add_argument('-u', '--up', type=float, default=None, help='Top cutoff frequency. Disabled by default.')
+    group2.add_argument('-S', '--start', metavar='SECONDS', type=float, default=0,
+                        help='If given, only analyze from the given position.')
+    group2.add_argument('-E', '--end', metavar='SECONDS', type=float, default=None,
+                        help='If given, only analyze up to the given position.')

-    parser.add_argument('-d', '--decimate', type=int, default=1, help='Downsample the signal by the given factor. '
+    group3 = parser.add_argument_group('Filtering')
+    group3.add_argument('-l', '--low', type=float, default=None, help='Bottom cutoff frequency. Disabled by default.')
+    group3.add_argument('-u', '--up', type=float, default=None, help='Top cutoff frequency. Disabled by default.')
+    group3.add_argument('-d', '--decimate', type=int, default=1, help='Downsample the signal by the given factor. '
                                                                      'Disabled by default')
-    parser.add_argument('-t', '--temporal', action='store_true', help='If given, any decimation will be applied in the '
+    group3.add_argument('-t', '--temporal', action='store_true', help='If given, any decimation will be applied in the '
                                                                      'time domain instead of the spectral domain.')

-    parser.add_argument('-e', '--erase', action='store_false', help='Erase existing outfile. If outfile existe and '
+    group4 = parser.add_argument_group('Other')
+    group4.add_argument('-e', '--erase', action='store_false', help='Erase existing outfile. If outfile exist and '
                                                                    '--erase is not provide, the script will exit.')
-
-    parser.add_argument('--start', metavar='SECONDS', type=float, default=0,
-                        help='If given, only analyze from the given position.')
-    parser.add_argument('--end', metavar='SECONDS', type=float, default=None,
-                        help='If given, only analyze up to the given position.')
+    group4.add_argument('-n', '--no-hyperres', action='store_True', help='Disable the hyper resolution evalutation of '
+                                                                          'the TDOA')
+    group4.add_argument('-M', '--mode', choices={'prepare', 'on-the-fly', 'estimate', 'auto'}, default='prepare',
+                        help=f'R|How to explore the TDOA space (default: %(default)s).\n'
+                             f'{BColors.BOLD}prepare{BColors.ENDC} precomputes all the possible TDOA pairs and then '
+                             f'evaluate them. All the results are save in memory.\n'
+                             f'{BColors.BOLD}on-the-fly{BColors.ENDC} compute the TDOA pairs at the same time as it '
+                             f'compute the loss function. Only the maximum is saved. Can be slower than '
+                             f'{BColors.BOLD}prepare{BColors.ENDC}.\n'
+                             f'{BColors.BOLD}estimate{BColors.ENDC} try to reduce the number of TDOA to evaluate.\n'
+                             f'{BColors.BOLD}auto{BColors.ENDC} automatically try to pick the right method.')

    args = parser.parse_args()
+    try:
+        if args.mode in ['estimate', 'auto']:
+            raise NotImplementedError(f'mode {args.mode} is not yet implemented')

        sys.exit(main(args))
+
+    except Exception as e:
+        print(e)
+        sys.exit(2)