Add smart method

7b3e656c · ferrari · d26361b0 · 7b3e656c · 7b3e656c · 7b3e656c
Commit 7b3e656c authored 3 years ago by ferrari
--- a/README.md
+++ b/README.md
@@ -76,7 +76,7 @@ Other:
  -e, --erase           Erase existing outfile. If outfile exist and --erase
                        is not provide, the script will exit.
  -n, --no-hyperres     Disable the hyper resolution evalutation of the TDOA
-  -M {estimate,auto,on-the-fly,prepare}, --mode {estimate,auto,on-the-fly,prepare}
+  -M {smart,auto,on-the-fly,prepare}, --mode {smart,auto,on-the-fly,prepare}
                        How to explore the TDOA space (default: prepare).
                        'prepare' precomputes all the possible TDOA
                        pairs and then evaluate them. All the results are save
@@ -84,8 +84,8 @@ Other:
                        'on-the-fly' compute the TDOA pairs at the same
                        time as it compute the loss function. Only the maximum
                        is saved. Can be slower than 'prepare'.
-                        'estimate' try to reduce the number of TDOA to
-                        evaluate.
+                        'smart' gradually increase the search space dimension, '
+                        reducing the number of tdoa to evaluate.
                        'auto' automatically try to pick the right
                        method.


--- a/c_corr.c
+++ b/c_corr.c
--- a/c_corr.pyx
+++ b/c_corr.pyx
@@ -7,7 +7,6 @@ ctypedef fused npType1:
    short
    int
    long
-    long long
    float
    double

@@ -15,11 +14,11 @@ ctypedef fused npType2:
    short
    int
    long
-    long long


 @cython.boundscheck(False)
 @cython.wraparound(False)
+@cython.embedsignature(True)
 def c_corr_at(npType1[:,::1] cc, npType2[::1, :] taus):
    total = taus.shape[1]
    n_pair = taus.shape[0]
@@ -38,34 +37,48 @@ def c_corr_at(npType1[:,::1] cc, npType2[::1, :] taus):

 @cython.boundscheck(False)
 @cython.wraparound(False)
-def c_corr_all(npType1[:,::1] cc, npType2 max_tdoa, npType2 base_tdoa):
+@cython.embedsignature(True)
+cpdef c_corr_all(cc, npType2 max_tdoa, npType2 base_tdoa):
+    cdef float[:,::1] cc_mem = cc.astype(np.float32)
    cdef double corr_val = 0.0
-    taus = np.zeros(cc.shape[0], int)
-    out_taus = np.zeros(cc.shape[0], int)
-    _c_corr_core(cc, taus, out_taus,  corr_val, base_tdoa-1, base_tdoa, max_tdoa, cc.shape[1], cc.shape[0])
+    cdef long[::1] taus = np.zeros(cc.shape[0], int)
+    cdef long[::1] out_taus = np.zeros(cc.shape[0], int)
+    corr_val = _c_corr_core(cc_mem, taus, out_taus,  corr_val, <long> base_tdoa-1, <long> base_tdoa,
+                            <long> max_tdoa, <long> cc.shape[1], <long> cc.shape[0])
    return np.log10(corr_val), out_taus


+cdef long moduloNeg(long a, long b):
+    if a < 0:
+        return a + b
+    else:
+        return a
+
+
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def _c_corr_core(npType1[:,::1] cc, npType2[::1] taus, npType2[::1] out_taus, double val,
-                 npType2 curr_tdoa, npType2 ind_tdoa, npType2 max_tdoa, npType2 win_size, npType2 n_pair):
-    if curr_tdoa < 0:
+@cython.profile(True)
+cdef double _c_corr_core(float[:,::1] cc, long[::1] taus, long[::1] out_taus, double val,
+                 long curr_tdoa, long ind_tdoa, long max_tdoa, long win_size, long n_pair):
    cdef int i0 = 1
    cdef int j0 = 0
+    cdef double tmp_val = 0.0
+    if curr_tdoa < 0:
        for i in range(ind_tdoa, n_pair):
            taus[i] = taus[i0] - taus[j0]
            i0 += 1
            if i0 >= ind_tdoa:
                j0 += 1
                i0 = j0 + 1
-        val = cc[0, taus[0]]
+        tmp_val = cc[0, moduloNeg(taus[0], win_size)]
        for i in range(1, n_pair):
-            val *= cc[i, taus[i] % win_size]
+            tmp_val *= cc[i, moduloNeg(taus[i], win_size)]
+        if val < tmp_val:
+            val = tmp_val
+            out_taus[:] = taus
        return val
    cdef int low = -max_tdoa
    cdef int high = max_tdoa
-    cdef double tmp_val = 0
    for i in range(curr_tdoa+1, ind_tdoa):
        if low < taus[i] - max_tdoa:
            low = -max_tdoa + taus[i]
@@ -73,7 +86,39 @@ def _c_corr_core(npType1[:,::1] cc, npType2[::1] taus, npType2[::1] out_taus, do
            high = max_tdoa + taus[i]
    for t in range(low, high+1):
        taus[curr_tdoa] = t
-        tmp_val = _c_corr_core(cc, taus, out_taus, val, curr_tdoa-1, ind_tdoa, max_size, win_size, n_pair)
+        val = _c_corr_core(cc, taus, out_taus, val, curr_tdoa-1, ind_tdoa, max_tdoa, win_size, n_pair)
+    return val
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.profile(True)
+def _c_corr_core2(npType1[:,::1] cc, npType2[::1] taus, npType2[::1] out_taus, double val,
+                 npType2 curr_tdoa, npType2 ind_tdoa, npType2 max_tdoa, npType2 win_size, npType2 n_pair):
+    cdef int i0 = 1
+    cdef int j0 = 0
+    cdef double tmp_val = 0.0
+    for i in range(ind_tdoa):
+        taus[i] =  - max_tdoa - 1
+    cdef int level = 0
+    while level >= 0:
+        taus[level] += 1
+        if taus[level] > min(taus[:level]) + max_tdoa:
+            taus[level] = max(taus[:level]) - max_tdoa - 1
+            level -= 1
+            continue
+        level +=1
+        if level == ind_tdoa:
+            level -= 1
+            for i in range(ind_tdoa, n_pair):
+                taus[i] = taus[i0] - taus[j0]
+                i0 += 1
+                if i0 >= ind_tdoa:
+                    j0 += 1
+                    i0 = j0 + 1
+                tmp_val = cc[0, moduloNeg(taus[0], win_size)]
+            for i in range(1, n_pair):
+                tmp_val *= cc[i, moduloNeg(taus[i], win_size)]
            if val < tmp_val:
                val = tmp_val
                out_taus[:] = taus

--- a/cython_setup.py
+++ b/cython_setup.py
@@ -2,5 +2,5 @@ from distutils.core import setup
 from Cython.Build import cythonize

 setup(
-    ext_modules = cythonize("c_corr.pyx", annotate=True)
+    ext_modules = cythonize("c_corr.pyx", annotate=True, compiler_directives={'language_level' : "3"})
 )
\ No newline at end of file
--- a/gsrp_smart_util.py
+++ b/gsrp_smart_util.py
+import numpy as np
+from math import ceil
+
+
+class Operation:
+    __slots__ = 'op', 'left', 'right', 'lifetime'
+
+    def __init__(self, op, left, right, lifetime):
+        self.op = op
+        self.left = left
+        self.right = right
+        self.lifetime = lifetime
+        
+    def __repr__(self):
+        return f'(op: {self.op}, l: {self.left}, r: {self.right}, lt: {self.lifetime})'
+        
+    def __eq__(self, other):
+        if isinstance(other, str):
+            return self.op == other
+        elif isinstance(other, Operation):
+            return self.op == other.op and self.left == other.left and\
+                   self.right == other.right and self.lifetime == other.lifetime
+        return False
+
+
+def gen_tree(size):
+    tree = [list() for _ in range(size)]
+    tree[-1].append(list(range(size)))
+    for i in range(size-2, -1, -1):
+        m_len = i + 1
+        unused = []
+        for group in tree[i+1]:
+            if len(group) > m_len:
+                tree[i].append(group[:-1].copy())
+                unused.append(group[-1])
+            else:
+                tree[i].append(group.copy())
+        for group in tree[i]:
+            le = len(group)
+            if le < m_len:
+                group.extend(unused[:m_len-le])
+                unused = unused[m_len-le:]
+            if not len(unused):
+                break
+        if len(unused):
+            for j in range(ceil(len(unused)/m_len)):
+                tree[i].append(unused[j*m_len:(j+1)*m_len])
+    return tree
+
+
+def op_tree(tree):
+    program = [list() for _ in range(len(tree))]
+    clean_list = [list() for _ in range(len(tree) + 1)]
+    program[0] = [Operation('mem', 0, group[0], len(tree)) for group in tree[0]]
+    for i in range(1, len(tree)):
+        for group in tree[i]:
+            if group[:-1] in tree[i-1]:
+                j = tree[i-1].index(group[:-1])
+                program[i-1][j].lifetime = i
+                program[i].append(Operation('mul', j, tree[0].index(group[-1:]), i))
+            else:
+                for j in range(i-1, -1, -1):
+                    if group in tree[j]:
+                        break
+                else:
+                    raise KeyError(f'{group} not found in:\n{tree}')
+                program[i].append(Operation('mem', j, tree[j].index(group), i))
+    for i, step in enumerate(program):
+        for j, op in enumerate(step):
+            clean_list[op.lifetime].append((i, j))
+    return program, clean_list
+
+
+def dep_tdoa(tij, nind, ntot):
+    i0 = 1
+    j0 = 0
+    for i in range(nind, ntot):
+        tij[i] = tij[i0] - tij[j0]
+        i0 += 1
+        if i0 >= nind:
+            j0 += 1
+            i0 = j0 + 1
+
+
+def num_ind(i, j, nind):
+    return j*(nind-1) + i-1 - (j*(j+1))//2
+
+
+def mul(mem1, mem2, cc, t_max, id1, id2, n_ind):
+    # assume len(id2) == 1
+    idx1, idx2 = np.meshgrid(np.arange(len(mem1[0])), np.arange(len(mem2[1])))
+    idx1, idx2 = idx1.ravel(), idx2.ravel()
+    out_tij = np.concatenate((mem1[1][idx1], mem2[1][idx2]), axis=-1)
+    mask = (np.abs(out_tij[:, :-1] - out_tij[:, -1:]) <= t_max).all(-1)
+    out_tij = out_tij[mask]
+    idx1, idx2 = idx1[mask], idx2[mask]
+    out_val = mem1[0][idx1] * mem2[0][idx2]
+    tij_dep = out_tij[:, :-1] - out_tij[:, -1:]
+    tij_dep *= np.array([1 if i > id2 else -1 for i in id1])
+    ch_dep = np.array([num_ind(i, id2, n_ind) if i > id2 else num_ind(id2, i, n_ind) for i in id1]) + n_ind
+    out_val *= cc[tij_dep, ch_dep].prod(-1)
+    return out_val, out_tij
+
+
+def mask_val(mem, val):
+    mask = mem[0] >= val
+    return mem[0][mask], mem[1][mask]
+
+
+def smart_gsrp(cc, n_ind, n_tot, t_max, tree, program, clean_list):
+    memory = dict()
+    val = 0
+    tij = np.zeros(n_tot, int)
+    for i, step in enumerate(program):
+        # increase dimensions
+        for j, op in enumerate(step):
+            if op == 'mem':
+                if i == 0:
+                    memory[(0, j)] = (np.concatenate((cc[-t_max:, op.right], cc[:t_max + 1, op.right])),
+                                      np.arange(-t_max, t_max + 1).reshape(-1, 1))
+                else:
+                    memory[(i, j)] = mask_val(memory[(op.left, op.right)], val)
+            else:  # op == 'mul'
+                memory[(i, j)] = mul(mask_val(memory[(i-1, op.left)], val), mask_val(memory[(0, op.right)], val),
+                                     cc, t_max, tree[i - 1][op.left], tree[0][op.right][0], n_ind)
+        # find potential maximum
+        for j in range(len(step)):
+            tij[tree[i][j]] = memory[(i, j)][1][np.argmax(memory[(i, j)][0])]
+        dep_tdoa(tij, n_ind, n_tot)
+        val = cc[tij, np.arange(n_tot)].prod()
+        # print('tdoa:', tij, 'val:', val, 'mem size:', (lambda x: f'{x} ({100 * x / (2 * t_max + 1) ** i}%)')(sum(len(o[0]) for o in memory.values())))
+
+        # Mem clean up
+        for p in clean_list[i]:
+            del memory[p]
+
+    return val, tij
+
+
+
+
+
+
--- a/gsrp_tdoa_hyperres.py
+++ b/gsrp_tdoa_hyperres.py
@@ -10,6 +10,8 @@ from numpy.fft import rfft, irfft
 import scipy.signal as sg
 import soundfile as sf
 import c_corr
+from gsrp_smart_util import *
+from math import ceil
 from scipy.signal.windows import tukey

 try:
@@ -58,6 +60,7 @@ def corr(data, pos, w_size, max_tdoa, decimate=1, mode='prepare', hyper=True):
        mat[k, j - 1] = 1
        v1[k] = i
        v2[k] = j
+    dw_size = w_size // decimate
    if mode == 'prepare':
        slices = slicer(-(cc_size // 2), cc_size // 2, (num_channels - 1), 16)
        tausf = []
@@ -66,10 +69,12 @@ def corr(data, pos, w_size, max_tdoa, decimate=1, mode='prepare', hyper=True):
            taus2 = np.matmul(mat, taus)
            tausf += [taus2[:, np.abs(taus2).max(0) <= cc_size // 2]]
        tausf = np.hstack(tausf)
-        dw_size = w_size // decimate
        tausf %= dw_size
    elif mode == 'on-the-fly':
        pass
+    elif mode == 'smart':
+        tree = gen_tree(num_channels - 1)
+        program, clean_list = op_tree(tree)
    else:
        raise ValueError(f'Unknown mode {mode}')

@@ -94,8 +99,7 @@ def corr(data, pos, w_size, max_tdoa, decimate=1, mode='prepare', hyper=True):
            der = np.zeros((num_channels - 1, num_channels - 1))
            der[ind] = coef[4:]
            poly_min = np.linalg.lstsq(der + der.T, -coef[1:4], rcond=None)[0] + mean
-            return pipe.predict(poly_min), poly_min
-
+            return pipe.predict(poly_min[np.newaxis]).item(), poly_min

    cc = np.empty((num_channel_pairs, dw_size), np.float32)
    for i in trange(len(pos)):
@@ -109,7 +113,13 @@ def corr(data, pos, w_size, max_tdoa, decimate=1, mode='prepare', hyper=True):
            tdoas[i, 0], index = c_corr.c_corr_at(cc, tausf)
            tdoas[i, 1:] = ((tausf[:(num_channels - 1), index] + dw_size // 2) % dw_size) - dw_size // 2
        elif mode == 'on-the-fly':
-            tdoas[i, 0], tdoas[i, 1:] = c_corr.c_corr_all(cc, cc_size//2, num_channels - 1)
+            tdoas[i, 0], tij = c_corr.c_corr_all(cc, cc_size//2, num_channels - 1)
+            tdoas[i, 1:] = tij[:(num_channels - 1)]
+        elif mode == 'smart':
+            maxs = cc.max(1, keepdims=True)
+            cc /= maxs
+            val, tij = smart_gsrp(cc.T, num_channels - 1, num_channel_pairs, cc_size // 2, tree, program, clean_list)
+            tdoas[i, 0], tdoas[i, 1:] = np.log10(val * maxs.prod()), tij[:(num_channels - 1)]
        else:
            raise ValueError(f'Unknown mode {mode}')

@@ -122,9 +132,6 @@ def corr(data, pos, w_size, max_tdoa, decimate=1, mode='prepare', hyper=True):
        return np.hstack((np.expand_dims(pos, -1), tdoas))


-
-
-
 def main(args):
    if args.erase and os.path.isfile(args.outfile):
        print(f'{BColors.WARNING}{args.outfile} already exist and erase is not set {BColors.ENDC}')
@@ -218,9 +225,7 @@ if __name__ == "__main__":
    parser.add_argument('infile', type=str, help='The sound file to process.')
    parser.add_argument('outfile', type=str, help='The text or npy file to write results to. Each row gives the '
                                                  'position (in samples), cross-correlation product, the independent '
-                                                  'TDOAs (in samples), and TDOAs derived from the independent ones. '
-                                                  'For plots, the panes give the cross-correlation product, '
-                                                  'independent TDOAS and derived TDOAs from top to bottom.')
+                                                  'TDOAs (in samples), and TDOAs derived from the independent ones. ')

    group1 = parser.add_argument_group('Channels')
    group1.add_argument('-c', '--channels', type=intlist, default=None,
@@ -258,14 +263,15 @@ if __name__ == "__main__":
                                                                    '--erase is not provide, the script will exit.')
    group4.add_argument('-n', '--no-hyperres', action='store_True', help='Disable the hyper resolution evalutation of '
                                                                          'the TDOA')
-    group4.add_argument('-M', '--mode', choices={'prepare', 'on-the-fly', 'estimate', 'auto'}, default='prepare',
+    group4.add_argument('-M', '--mode', choices={'prepare', 'on-the-fly', 'smart', 'auto'}, default='prepare',
                        help=f'R|How to explore the TDOA space (default: %(default)s).\n'
                             f'{BColors.BOLD}prepare{BColors.ENDC} precomputes all the possible TDOA pairs and then '
                             f'evaluate them. All the results are save in memory.\n'
                             f'{BColors.BOLD}on-the-fly{BColors.ENDC} compute the TDOA pairs at the same time as it '
                             f'compute the loss function. Only the maximum is saved. Can be slower than '
                             f'{BColors.BOLD}prepare{BColors.ENDC}.\n'
-                             f'{BColors.BOLD}estimate{BColors.ENDC} try to reduce the number of TDOA to evaluate.\n'
+                             f'{BColors.BOLD}smart{BColors.ENDC} gradually increase the search space dimension, '
+                             f'reducing the number of tdoa to evaluate.\n'
                             f'{BColors.BOLD}auto{BColors.ENDC} automatically try to pick the right method.')

    args = parser.parse_args()