diff --git a/README.md b/README.md
index 20813f125360f3941d0cb1657851163514c6e504..9ee0488bc025e68b2482bd69ca19ff1bc8622d82 100644
--- a/README.md
+++ b/README.md
@@ -23,6 +23,8 @@ usage: gsrp_tdoa_hyperres.py [-h] [-c CHANNELS] [-i INVERSE] [-f FRAME_SIZE]
 Arguments
 -----
 ```
+Computes TDOA estimates from a multi-channel recording.
+
 positional arguments:
   infile                The sound file to process.
   outfile               The text or npy file to write results to. Each row
@@ -34,6 +36,8 @@ positional arguments:
 
 optional arguments:
   -h, --help            show this help message and exit
+
+Channels:
   -c CHANNELS, --channels CHANNELS
                         The channels to cross-correlate. Accepts two or more,
                         but beware of high memory use. To be given as a comma-
@@ -43,15 +47,23 @@ optional arguments:
                         Inverse the channel. To be given as a comma-separated
                         list of numbers,with 0 referring to the first channel
                         once channels have been chosen by --channels.
+
+Size settings:
   -f FRAME_SIZE, --frame-size FRAME_SIZE
                         The size of the cross-correlation frames in seconds
                         (default: 0.2)
-  -s HOP_SIZE, --hop-size HOP_SIZE
+  -s STRIDE, --stride STRIDE
                         The step between the beginnings of sequential frames
                         in seconds (default: 0.01), or the postion in second
                         if csv file path is given.
   -m MAX_TDOA, --max-tdoa MAX_TDOA
                         The maximum TDOA in seconds (default: 0.0011).
+  -S SECONDS, --start SECONDS
+                        If given, only analyze from the given position.
+  -E SECONDS, --end SECONDS
+                        If given, only analyze up to the given position.
+
+Filtering:
   -l LOW, --low LOW     Bottom cutoff frequency. Disabled by default.
   -u UP, --up UP        Top cutoff frequency. Disabled by default.
   -d DECIMATE, --decimate DECIMATE
@@ -59,8 +71,23 @@ optional arguments:
                         default
   -t, --temporal        If given, any decimation will be applied in the time
                         domain instead of the spectral domain.
-  -e, --erase           Erase existing outfile. If outfile existe and --erase
+
+Other:
+  -e, --erase           Erase existing outfile. If outfile exist and --erase
                         is not provide, the script will exit.
-  --start SECONDS       If given, only analyze from the given position.
-  --end SECONDS         If given, only analyze up to the given position.
+  -n, --no-hyperres     Disable the hyper resolution evalutation of the TDOA
+  -M {estimate,auto,on-the-fly,prepare}, --mode {estimate,auto,on-the-fly,prepare}
+                        How to explore the TDOA space (default: prepare).
+                        'prepare' precomputes all the possible TDOA
+                        pairs and then evaluate them. All the results are save
+                        in memory.
+                        'on-the-fly' compute the TDOA pairs at the same
+                        time as it compute the loss function. Only the maximum
+                        is saved. Can be slower than 'prepare'.
+                        'estimate' try to reduce the number of TDOA to
+                        evaluate.
+                        'auto' automatically try to pick the right
+                        method.
+
+Process finished with exit code 0
 ```
diff --git a/c_corr.pyx b/c_corr.pyx
index a372c73ddf43136e6f60e32b9d175038ce08a5a0..0e67e22b99dacebe5aded8af5ec073f4f6beb480 100755
--- a/c_corr.pyx
+++ b/c_corr.pyx
@@ -20,7 +20,7 @@ ctypedef fused npType2:
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def c_corr(npType1[:,::1] cc, npType2[::1, :] taus):
+def c_corr_at(npType1[:,::1] cc, npType2[::1, :] taus):
     total = taus.shape[1]
     n_pair = taus.shape[0]
     cdef double max_corr_val = 0.0
@@ -33,4 +33,48 @@ def c_corr(npType1[:,::1] cc, npType2[::1, :] taus):
         if corr_val > max_corr_val:
             max_corr_val = corr_val
             index = i
-    return np.log10(max_corr_val), index
\ No newline at end of file
+    return np.log10(max_corr_val), index
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def c_corr_all(npType1[:,::1] cc, npType2 max_tdoa, npType2 base_tdoa):
+    cdef double corr_val = 0.0
+    taus = np.zeros(cc.shape[0], int)
+    out_taus = np.zeros(cc.shape[0], int)
+    _c_corr_core(cc, taus, out_taus,  corr_val, base_tdoa-1, base_tdoa, max_tdoa, cc.shape[1], cc.shape[0])
+    return np.log10(corr_val), out_taus
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def _c_corr_core(npType1[:,::1] cc, npType2[::1] taus, npType2[::1] out_taus, double val,
+                 npType2 curr_tdoa, npType2 ind_tdoa, npType2 max_tdoa, npType2 win_size, npType2 n_pair):
+    if curr_tdoa < 0:
+        cdef int i0 = 1
+        cdef int j0 = 0
+        for i in range(ind_tdoa, n_pair):
+            taus[i] = taus[i0] - taus[j0]
+            i0 += 1
+            if i0 >= ind_tdoa:
+                j0 += 1
+                i0 = j0 + 1
+        val = cc[0, taus[0]]
+        for i in range(1, n_pair):
+            val *= cc[i, taus[i] % win_size]
+        return val
+    cdef int low = -max_tdoa
+    cdef int high = max_tdoa
+    cdef double tmp_val = 0
+    for i in range(curr_tdoa+1, ind_tdoa):
+        if low < taus[i] - max_tdoa:
+            low = -max_tdoa + taus[i]
+        if high > max_tdoa + taus[i]:
+            high = max_tdoa + taus[i]
+    for t in range(low, high+1):
+        taus[curr_tdoa] = t
+        tmp_val = _c_corr_core(cc, taus, out_taus, val, curr_tdoa-1, ind_tdoa, max_size, win_size, n_pair)
+        if val < tmp_val:
+            val = tmp_val
+            out_taus[:] = taus
+    return val
\ No newline at end of file
diff --git a/gsrp_tdoa_hyperres.py b/gsrp_tdoa_hyperres.py
index 0365e7d16abc53604eeef75eeacd1ea5c345317e..27f8d05f6dcb5eee4137c2c3cca1913c80d069f6 100755
--- a/gsrp_tdoa_hyperres.py
+++ b/gsrp_tdoa_hyperres.py
@@ -9,8 +9,9 @@ import numpy as np
 from numpy.fft import rfft, irfft
 import scipy.signal as sg
 import soundfile as sf
-from c_corr import c_corr
+import c_corr
 from scipy.signal.windows import tukey
+
 try:
     from tqdm import trange
 except ImportError:
@@ -41,46 +42,62 @@ def slicer(down, up, ndim, n):
     return slices[index].reshape(ndim, -1).T
 
 
-def corr(data, sr, pos, w_size, max_tdoa, decimate=1):
+def corr(data, pos, w_size, max_tdoa, decimate=1, mode='prepare', hyper=True):
     num_channels = data.shape[1]
     num_channel_pairs = num_channels * (num_channels - 1) // 2
 
-    # apply zero-padding
-    data = np.pad(data, (((w_size - 1) // 2, (w_size - 1) // 2), (0, 0)),
-                  'constant')
-
-    # prepare window
-    win = np.expand_dims(tukey(w_size, 0.2), -1)
-
-    # prepare index matrices
+    data = np.pad(data, [((w_size - 1) // 2, (w_size - 1) // 2), (0, 0)], 'constant')
+    win = tukey(w_size, 0.2)[:, np.newaxis]
+    cc_size = min(w_size, int(2 * max_tdoa // decimate))
     v1 = np.empty(num_channel_pairs, np.int8)
     v2 = np.empty(num_channel_pairs, np.int8)
     mat = np.zeros((num_channel_pairs, num_channels - 1), np.int8)
     for k, (i, j) in enumerate(itertools.combinations(range(num_channels), 2)):
         if i > 0:
-            mat[k, i-1] = -1
-        mat[k, j-1] = 1
+            mat[k, i - 1] = -1
+        mat[k, j - 1] = 1
         v1[k] = i
         v2[k] = j
-    cc_size = min(w_size, int(2 * max_tdoa // decimate))
-    slices = slicer(-(cc_size // 2), cc_size // 2, (num_channels - 1), 16)
-    tausf = []
-    for j in range(len(slices)):
-        taus = np.mgrid[slices[j]].reshape(num_channels - 1, -1).astype(np.int16)
-        taus2 = np.matmul(mat, taus)
-        tausf += [taus2[:, np.abs(taus2).max(0) <= cc_size // 2]]
-    tausf = np.hstack(tausf)
-    dw_size = w_size // decimate
-    tausf %= dw_size
-
-    # run the TDOA search
+    if mode == 'prepare':
+        slices = slicer(-(cc_size // 2), cc_size // 2, (num_channels - 1), 16)
+        tausf = []
+        for j in range(len(slices)):
+            taus = np.mgrid[slices[j]].reshape(num_channels - 1, -1).astype(np.int16)
+            taus2 = np.matmul(mat, taus)
+            tausf += [taus2[:, np.abs(taus2).max(0) <= cc_size // 2]]
+        tausf = np.hstack(tausf)
+        dw_size = w_size // decimate
+        tausf %= dw_size
+    elif mode == 'on-the-fly':
+        pass
+    else:
+        raise ValueError(f'Unknown mode {mode}')
+
     tdoas = np.zeros((len(pos), num_channels), np.float32)
-    tdoas2 = np.zeros((len(pos), num_channels -1), np.float32)
+
+    if hyper:   # prepare hyper res
+        tdoas2 = np.zeros((len(pos), num_channels), np.float32)
+        poly = PolynomialFeatures(2)
+        lin = LinearRegression()
+        pipe = Pipeline([('poly', poly), ('lin', lin)])
+        ind = np.triu_indices(num_channels - 1)
+
+        def _hyperres(taus, cc):
+            taus = taus + np.stack(np.meshgrid(*(num_channels - 1) * (np.arange(-2, 3),)), 0).reshape(
+                num_channels - 1, -1).T
+            taus = np.matmul(mat, taus.T)
+            taus = taus[:, np.abs(taus).max(0) <= cc_size // 2]
+            mean = taus.mean(-1)[:3]
+            coef = pipe.fit(taus.T[:, :3] - mean,
+                            cc[np.expand_dims(np.arange(num_channel_pairs), 1), taus.astype(np.int)].prod(0)
+                            ).named_steps['lin'].coef_
+            der = np.zeros((num_channels - 1, num_channels - 1))
+            der[ind] = coef[4:]
+            poly_min = np.linalg.lstsq(der + der.T, -coef[1:4], rcond=None)[0] + mean
+            return pipe.predict(poly_min), poly_min
+
+
     cc = np.empty((num_channel_pairs, dw_size), np.float32)
-    poly = PolynomialFeatures(2)
-    lin = LinearRegression()
-    pipe = Pipeline([('poly',poly),('lin',lin)])
-    ind = np.triu_indices(num_channels -1)
     for i in trange(len(pos)):
         fft = rfft(win * data[pos[i]:w_size + pos[i]], axis=0)
         if decimate > 1:
@@ -88,22 +105,24 @@ def corr(data, sr, pos, w_size, max_tdoa, decimate=1):
         fft = np.asarray(fft, dtype=np.complex64)
         cc[:] = irfft(fft[:, v2] * np.conj(fft[:, v1]), axis=0).T
         cc -= cc.min(-1, keepdims=True)
-        val, index = c_corr(cc, tausf)
-        tdoas[i, 0] = val
-        tdoas[i, 1:] = ((tausf[:(num_channels - 1), index] + dw_size//2) % dw_size) - dw_size//2
-
-        # hyper resolution
-        taus = tdoas[i,1:num_channels] + np.stack(np.meshgrid(*(num_channels-1) * (np.arange(-2,3),)),0).reshape(num_channels-1, -1).T
-        taus = np.matmul(mat, taus.T)
-        taus = taus[:, np.abs(taus).max(0) <= cc_size // 2]
-        mean = taus.mean(-1)[:3]
-        coef = pipe.fit((taus).T[:,:3] - mean,
-                        cc[np.expand_dims(np.arange(num_channel_pairs), 1), taus.astype(np.int)].prod(0)
-                        ).named_steps['lin'].coef_
-        der = np.zeros((num_channels -1, num_channels - 1))
-        der[ind] = coef[4:]
-        tdoas2[i] = np.linalg.lstsq(der + der.T, -coef[1:4], rcond=None)[0] + mean
-    return np.hstack((np.expand_dims(pos, -1), tdoas)), np.hstack((np.expand_dims(pos, -1), tdoas2))
+        if mode == 'prepare':
+            tdoas[i, 0], index = c_corr.c_corr_at(cc, tausf)
+            tdoas[i, 1:] = ((tausf[:(num_channels - 1), index] + dw_size // 2) % dw_size) - dw_size // 2
+        elif mode == 'on-the-fly':
+            tdoas[i, 0], tdoas[i, 1:] = c_corr.c_corr_all(cc, cc_size//2, num_channels - 1)
+        else:
+            raise ValueError(f'Unknown mode {mode}')
+
+        if hyper:
+            tdoas[i, 0], tdoas2[i, 1:] = _hyperres(tdoas[i, 1:], cc)
+
+    if hyper:
+        return np.hstack((np.expand_dims(pos, -1), tdoas)), np.hstack((np.expand_dims(pos, -1), tdoas2))
+    else:
+        return np.hstack((np.expand_dims(pos, -1), tdoas))
+
+
+
 
 
 def main(args):
@@ -114,7 +133,8 @@ def main(args):
     # load audio file
     print(f'Loading {args.infile}...')
     sr = sf.info(args.infile).samplerate
-    sound, sr = sf.read(args.infile, start=int(sr*args.start), stop=int(sr*args.end) if args.end is not None else None,
+    sound, sr = sf.read(args.infile, start=int(sr * args.start),
+                        stop=int(sr * args.end) if args.end is not None else None,
                         dtype=np.float32, always_2d=True)
     if args.channels is not None:
         sound = sound[:, args.channels]
@@ -132,11 +152,11 @@ def main(args):
         print("Filtering...")
         if args.low is not None:
             if args.highpass is None:
-                sos = sg.butter(3, 2*args.low/sr, 'highpass', output='sos')
+                sos = sg.butter(3, 2 * args.low / sr, 'highpass', output='sos')
             else:
-                sos = sg.butter(3, [2*args.low/sr, 2*args.up/sr], 'bandpass', output='sos')
+                sos = sg.butter(3, [2 * args.low / sr, 2 * args.up / sr], 'bandpass', output='sos')
         else:
-            sos = sg.butter(3, 2*args.up/sr, 'lowpass', output='sos')
+            sos = sg.butter(3, 2 * args.up / sr, 'lowpass', output='sos')
         sound = sg.sosfiltfilt(sos, sound, axis=0)
 
     if args.decimate and args.temporal:
@@ -144,20 +164,17 @@ def main(args):
         sr /= args.decimate
 
     # Position where the TDOAs are computed
-    if os.path.isfile(args.hop_size):
-        pos = (sr * np.loadtxt(args.hop_size, delimiter=',')).astype(int).ravel()
+    if os.path.isfile(args.stride):
+        pos = (sr * np.loadtxt(args.stride, delimiter=',')).astype(int).ravel()
     else:
         try:
-            pos = np.arange(0, len(sound), int(sr * float(args.hop_size)))
+            pos = np.arange(0, len(sound), int(sr * float(args.stride)))
         except ValueError:
-            raise ValueError(f'Error: hop size {args.hop_size} is neither an existing file nor a float')
-
+            raise ValueError(f'Error: hop size {args.stride} is neither an existing file nor a float')
 
     print("Computing TDOAs...")
-    result1, result2 = corr(sound, sr, pos, int(sr * args.frame_size),
-                  max_tdoa=int(np.ceil(sr * args.max_tdoa)),
-                  decimate=(args.decimate
-                            if not args.temporal else 1))
+    result1 = corr(sound, pos, int(sr * args.frame_size), max_tdoa=int(np.ceil(sr * args.max_tdoa)),
+                   decimate=args.decimate if not args.temporal else 1, mode=args.mode, hyper=not args.no_hyperes)
 
     # compute additional non-independent TDOAs
     additional1 = [(b - a) for a, b in itertools.combinations(result1.T[2:], 2)]
@@ -176,7 +193,21 @@ def main(args):
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description='Computes TDOA estimates from a multi-channel recording.')
+    class SmartFormatter(argparse.HelpFormatter):
+        """
+        Allow to change argparse formating behaviour for one option by adding \'R|\' at the start of the help
+        """
+
+        def _split_lines(self, text, width):
+            if text.startswith('R|'):
+                return [l for t in text[2:].splitlines() for l in argparse.HelpFormatter._split_lines(self, t, width)]
+                # this is the RawTextHelpFormatter._split_lines
+            return argparse.HelpFormatter._split_lines(self, text, width)
+
+
+    parser = argparse.ArgumentParser(description='Computes TDOA estimates from a multi-channel recording.',
+                                     formatter_class=SmartFormatter)
+
     parser.add_argument('infile', type=str, help='The sound file to process.')
     parser.add_argument('outfile', type=str, help='The text or npy file to write results to. Each row gives the '
                                                   'position (in samples), cross-correlation product, the independent '
@@ -184,38 +215,59 @@ if __name__ == "__main__":
                                                   'For plots, the panes give the cross-correlation product, '
                                                   'independent TDOAS and derived TDOAs from top to bottom.')
 
-    parser.add_argument('-c', '--channels', type=intlist, default=None,
+    group1 = parser.add_argument_group('Channels')
+    group1.add_argument('-c', '--channels', type=intlist, default=None,
                         help='The channels to cross-correlate. Accepts two or more,  but beware of high memory use. To '
                              'be given as a comma-separated list of numbers, with 0 referring to the first channel '
                              '(default: all channels).')
-    parser.add_argument('-i', '--inverse', type=intlist, default=None,
+    group1.add_argument('-i', '--inverse', type=intlist, default=None,
                         help='Inverse the channel. To be given as a comma-separated list of numbers,'
                              'with 0 referring to the first channel once channels have been chosen by --channels.')
 
-    parser.add_argument('-f', '--frame-size', type=float, default=0.2,
+    group2 = parser.add_argument_group('Size settings')
+    group2.add_argument('-f', '--frame-size', type=float, default=0.2,
                         help='The size of the cross-correlation frames in seconds  (default: %(default)s)')
-    parser.add_argument('-s', '--hop-size', type=str, default=0.01,
+    group2.add_argument('-s', '--stride', type=str, default=0.01,
                         help='The step between the beginnings of sequential frames  in seconds (default: %(default)s), '
                              'or the postion in second if csv file path is given.')
-    parser.add_argument('-m', '--max-tdoa', type=float, default=0.0011,
+    group2.add_argument('-m', '--max-tdoa', type=float, default=0.0011,
                         help='The maximum TDOA in seconds (default: %(default)s).')
 
-    parser.add_argument('-l', '--low', type=float, default=None, help='Bottom cutoff frequency. Disabled by default.')
-    parser.add_argument('-u', '--up', type=float, default=None, help='Top cutoff frequency. Disabled by default.')
+    group2.add_argument('-S', '--start', metavar='SECONDS', type=float, default=0,
+                        help='If given, only analyze from the given position.')
+    group2.add_argument('-E', '--end', metavar='SECONDS', type=float, default=None,
+                        help='If given, only analyze up to the given position.')
 
-    parser.add_argument('-d', '--decimate', type=int, default=1, help='Downsample the signal by the given factor. '
+    group3 = parser.add_argument_group('Filtering')
+    group3.add_argument('-l', '--low', type=float, default=None, help='Bottom cutoff frequency. Disabled by default.')
+    group3.add_argument('-u', '--up', type=float, default=None, help='Top cutoff frequency. Disabled by default.')
+    group3.add_argument('-d', '--decimate', type=int, default=1, help='Downsample the signal by the given factor. '
                                                                       'Disabled by default')
-    parser.add_argument('-t', '--temporal', action='store_true', help='If given, any decimation will be applied in the '
+    group3.add_argument('-t', '--temporal', action='store_true', help='If given, any decimation will be applied in the '
                                                                       'time domain instead of the spectral domain.')
 
-    parser.add_argument('-e', '--erase', action='store_false', help='Erase existing outfile. If outfile existe and '
+    group4 = parser.add_argument_group('Other')
+    group4.add_argument('-e', '--erase', action='store_false', help='Erase existing outfile. If outfile exist and '
                                                                     '--erase is not provide, the script will exit.')
-
-    parser.add_argument('--start', metavar='SECONDS', type=float, default=0,
-                        help='If given, only analyze from the given position.')
-    parser.add_argument('--end', metavar='SECONDS', type=float, default=None,
-                        help='If given, only analyze up to the given position.')
+    group4.add_argument('-n', '--no-hyperres', action='store_True', help='Disable the hyper resolution evalutation of '
+                                                                          'the TDOA')
+    group4.add_argument('-M', '--mode', choices={'prepare', 'on-the-fly', 'estimate', 'auto'}, default='prepare',
+                        help=f'R|How to explore the TDOA space (default: %(default)s).\n'
+                             f'{BColors.BOLD}prepare{BColors.ENDC} precomputes all the possible TDOA pairs and then '
+                             f'evaluate them. All the results are save in memory.\n'
+                             f'{BColors.BOLD}on-the-fly{BColors.ENDC} compute the TDOA pairs at the same time as it '
+                             f'compute the loss function. Only the maximum is saved. Can be slower than '
+                             f'{BColors.BOLD}prepare{BColors.ENDC}.\n'
+                             f'{BColors.BOLD}estimate{BColors.ENDC} try to reduce the number of TDOA to evaluate.\n'
+                             f'{BColors.BOLD}auto{BColors.ENDC} automatically try to pick the right method.')
 
     args = parser.parse_args()
+    try:
+        if args.mode in ['estimate', 'auto']:
+            raise NotImplementedError(f'mode {args.mode} is not yet implemented')
+
+        sys.exit(main(args))
 
-    sys.exit(main(args))
+    except Exception as e:
+        print(e)
+        sys.exit(2)