IPI_bombyx.py

import ipi_extract as ipi
import pandas as pd
import argparse
import numpy as np
import os
import matplotlib.pyplot as plt
from functools import partial
from matplotlib.widgets import Button
import sys


class BetterIter(object):
    def __init__(self, to_wrap, save, file_list):
        self.wrapped = iter(to_wrap)
        self.max_num = len(to_wrap)
        self.to_wrap = to_wrap
        self.overlap = False
        self.curr_len = None
        self.save = save
        self.file_list = file_list

    def __iter__(self):
        return self

    def _test_done(self):
        self.curr_len = len(self.list_pass)
        self.overlap = self.curr_len >= self.max_num

    def __next__(self):
        while True:
            try:
                val = next(self.wrapped)
            except StopIteration:
                self.wrapped = iter(self.to_wrap)
                val = next(self.wrapped)

            self.done_file.seek(0)
            self.list_pass = list(int(v) for v in self.done_file.read().split('\n') if len(v))
            self.usr_pass = set(v for f in self.save for v in f.passage.unique() if v in self.file_list)
            self._test_done()
            if (val not in self.list_pass) or (self.overlap and (val not in self.usr_pass)):
                break
        self.current = val
        return val


def _next_file(event, refs, order, df, save, args, outpath, done_file, text):
    tmp = pd.DataFrame.from_dict(refs['callback'].df, orient='index')
    ind = order.current
    tmp['file'] = df.iloc[ind].filepredmax
    tmp['passage'] = df.iloc[ind].ipassage
    tmp['nb_ind'] = df.iloc[ind].nbindiv
    tmp['pred_max'] = df.iloc[ind].predmax
    tmp['annotator'] = args.annotator
    save.append(tmp)
    df_ann = pd.concat(save)
    df_ann.to_hdf(outpath, 'df')
    if len(tmp):
        done_file.write(f'{df.iloc[ind].ipassage}\n')
        done_file.flush()
    ind = order.__next__()
    try:
        ipi.reset(refs['callback'], os.path.join(args.wd, df.iloc[ind].filepredmax.strip('/')), args.channel)
        refs['fig'].canvas.set_window_title('IPI of ' + df.iloc[ind].filepredmax.rsplit('/', 1)[-1])
        usr_files = [v for f in save for v in f.passage.unique() if v in order.list_pass]
        text.set_text(f'{order.curr_len} files done\n{len(usr_files)}/{len(set(order.list_pass))} by you')
    except (RuntimeError,FileNotFoundError) as e:
        print(e, 'Opening next file')
        _next_file(event, refs, order, df, save, args, outpath, done_file, text)


def main(args):
    if args.out == '':
        outpath = args.input.rsplit('.', 1)[0] + f'_{args.annotator}.h5'
    else:
        outpath = args.out
    save = []
    if os.path.isfile(outpath) and not args.erase:
        save.append(pd.read_hdf(outpath))
    df = pd.read_pickle(args.input)
    if not os.path.isfile(args.done_file):
        raise FileNotFoundError(f'Did not find the current completion file at {args.done_file}')
    done_file = open(args.done_file, 'a+')
    done_file.seek(0)
    file_list = [int(v) for v in done_file.read().split('\n') if len(v)]
    overlap = False
    if not len(df):
        df = pd.read_pickle(args.input)
        overlap = True
    if args.nb_ind != -1:
        if args.equal:
            df = df[df.nbindiv == args.nb_ind]
        else:
            df = df[df.nbindiv <= args.nb_ind]
    samples_order = BetterIter(np.random.choice(len(df), len(df), replace=False), save, file_list)
    samples_order.done_file = done_file
    samples_order.overlap = overlap
    ind = samples_order.__next__()
    try:
        ref_dict = ipi.init(os.path.join(args.wd, df.iloc[ind].filepredmax.strip('/')), args.channel)
    except(RuntimeError,FileNotFoundError) as e:
        print(e, 'Opening next file')
        ind = samples_order.__next__()
        ref_dict = ipi.init(os.path.join(args.wd, df.iloc[ind].filepredmax.strip('/')), args.channel)
    text_ax = plt.subplot(ref_dict['gridspec'][-1, -4:-2])
    usr_files = [v for f in save for v in f.passage.unique() if v in file_list]
    text = text_ax.text(0.5, 0.5, f'{len(file_list)} files done\n{len(usr_files)}/{len(set(file_list))} by you', horizontalalignment='center',
                 verticalalignment='center', transform=text_ax.transAxes)
    text_ax.axis('off')
    next_file = partial(_next_file, refs=ref_dict, order=samples_order, df=df, save=save, args=args, outpath=outpath,
                        done_file=done_file, text=text)
    next_ax = plt.subplot(ref_dict['gridspec'][-1, -2:])
    next_button = Button(next_ax, 'Next file')
    next_button.on_clicked(next_file)
    plt.draw()
    plt.pause(0.2)
    ref_dict['fig'].set_constrained_layout(False)
    plt.show()
    tmp = pd.DataFrame.from_dict(ref_dict['callback'].df, orient='index')
    ind = samples_order.current
    tmp['file'] = df.iloc[ind].filepredmax
    tmp['passage'] = df.iloc[ind].ipassage
    tmp['nb_ind'] = df.iloc[ind].nbindiv
    tmp['pred_max'] = df.iloc[ind].predmax
    save.append(tmp)
    df_ann = pd.concat(save)
    df_ann['annotator'] = args.annotator
    df_ann.to_hdf(outpath, 'df')
    done_file.close()
    return 0


if __name__ == '__main__':
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("input", type=str, help="Input metadata file")
    parser.add_argument("annotator", type=str, help="Your name")
    parser.add_argument("--out", type=str, default='', help="Output file. Default to the input_path'.annotator.h5'")
    parser.add_argument("--channel", type=int, default=0, help="Sound channel to be analysed. Indices start from 0.")
    parser.add_argument("--nb_ind", type=int, default=1, help="Maximum number of individual for each track. -1 for no limit")
    parser.add_argument("--equal", action='store_true', help="Adding this tag will limit the track with nb_ind sperm whales")
    parser.add_argument("--erase", action='store_true', help="If out file exist and this option is not given,"
                                                             " the previous output will be read and appended")
    parser.add_argument("--wd", type=str, default='/nfs/NASDELL/SABIOD/SITE/BOMBYX/', help='Path to root dir'
                                                                                           ' containing Bombyx files')
    parser.add_argument("--done_file", type=str,
                        default='/nfs/NASDELL/SABIOD/SITE/BOMBYX/manip_2021/IPI_scripts/done_file.csv',
                        help='Path to file listing files done')

    args = parser.parse_args()
    sys.exit(main(args))