Select Git revision
IPI_bombyx.py
IPI_bombyx.py 6.43 KiB
import ipi_extract as ipi
import pandas as pd
import argparse
import numpy as np
import os
import matplotlib.pyplot as plt
from functools import partial
from matplotlib.widgets import Button
import sys
class BetterIter(object):
def __init__(self, to_wrap, save, file_list):
self.wrapped = iter(to_wrap)
self.max_num = len(to_wrap)
self.to_wrap = to_wrap
self.overlap = False
self.curr_len = None
self.save = save
self.file_list = file_list
def __iter__(self):
return self
def _test_done(self):
self.curr_len = len(self.list_pass)
self.overlap = self.curr_len >= self.max_num
def __next__(self):
while True:
try:
val = next(self.wrapped)
except StopIteration:
self.wrapped = iter(self.to_wrap)
val = next(self.wrapped)
self.done_file.seek(0)
self.list_pass = list(int(v) for v in self.done_file.read().split('\n') if len(v))
self.usr_pass = set(v for f in self.save for v in f.passage.unique() if v in self.file_list)
self._test_done()
if (val not in self.list_pass) or (self.overlap and (val not in self.usr_pass)):
break
self.current = val
return val
def _next_file(event, refs, order, df, save, args, outpath, done_file, text):
tmp = pd.DataFrame.from_dict(refs['callback'].df, orient='index')
ind = order.current
tmp['file'] = df.iloc[ind].filepredmax
tmp['passage'] = df.iloc[ind].ipassage
tmp['nb_ind'] = df.iloc[ind].nbindiv
tmp['pred_max'] = df.iloc[ind].predmax
tmp['annotator'] = args.annotator
save.append(tmp)
df_ann = pd.concat(save)
df_ann.to_hdf(outpath, 'df')
if len(tmp):
done_file.write(f'{df.iloc[ind].ipassage}\n')
done_file.flush()
ind = order.__next__()
try:
ipi.reset(refs['callback'], os.path.join(args.wd, df.iloc[ind].filepredmax.strip('/')), args.channel)
refs['fig'].canvas.set_window_title('IPI of ' + df.iloc[ind].filepredmax.rsplit('/', 1)[-1])
usr_files = [v for f in save for v in f.passage.unique() if v in order.list_pass]
text.set_text(f'{order.curr_len} files done\n{len(usr_files)}/{len(set(order.list_pass))} by you')
except (RuntimeError,FileNotFoundError) as e:
print(e, 'Opening next file')
_next_file(event, refs, order, df, save, args, outpath, done_file, text)
def main(args):
if args.out == '':
outpath = args.input.rsplit('.', 1)[0] + f'_{args.annotator}.h5'
else:
outpath = args.out
save = []
if os.path.isfile(outpath) and not args.erase:
save.append(pd.read_hdf(outpath))
df = pd.read_pickle(args.input)
if not os.path.isfile(args.done_file):
raise FileNotFoundError(f'Did not find the current completion file at {args.done_file}')
done_file = open(args.done_file, 'a+')
done_file.seek(0)
file_list = [int(v) for v in done_file.read().split('\n') if len(v)]
overlap = False
if not len(df):
df = pd.read_pickle(args.input)
overlap = True
if args.nb_ind != -1:
if args.equal:
df = df[df.nbindiv == args.nb_ind]
else:
df = df[df.nbindiv <= args.nb_ind]
samples_order = BetterIter(np.random.choice(len(df), len(df), replace=False), save, file_list)
samples_order.done_file = done_file
samples_order.overlap = overlap
ind = samples_order.__next__()
try:
ref_dict = ipi.init(os.path.join(args.wd, df.iloc[ind].filepredmax.strip('/')), args.channel)
except(RuntimeError,FileNotFoundError) as e:
print(e, 'Opening next file')
ind = samples_order.__next__()
ref_dict = ipi.init(os.path.join(args.wd, df.iloc[ind].filepredmax.strip('/')), args.channel)
text_ax = plt.subplot(ref_dict['gridspec'][-1, -4:-2])
usr_files = [v for f in save for v in f.passage.unique() if v in file_list]
text = text_ax.text(0.5, 0.5, f'{len(file_list)} files done\n{len(usr_files)}/{len(set(file_list))} by you', horizontalalignment='center',
verticalalignment='center', transform=text_ax.transAxes)
text_ax.axis('off')
next_file = partial(_next_file, refs=ref_dict, order=samples_order, df=df, save=save, args=args, outpath=outpath,
done_file=done_file, text=text)
next_ax = plt.subplot(ref_dict['gridspec'][-1, -2:])
next_button = Button(next_ax, 'Next file')
next_button.on_clicked(next_file)
plt.draw()
plt.pause(0.2)
ref_dict['fig'].set_constrained_layout(False)
plt.show()
tmp = pd.DataFrame.from_dict(ref_dict['callback'].df, orient='index')
ind = samples_order.current
tmp['file'] = df.iloc[ind].filepredmax
tmp['passage'] = df.iloc[ind].ipassage
tmp['nb_ind'] = df.iloc[ind].nbindiv
tmp['pred_max'] = df.iloc[ind].predmax
save.append(tmp)
df_ann = pd.concat(save)
df_ann['annotator'] = args.annotator
df_ann.to_hdf(outpath, 'df')
done_file.close()
return 0
if __name__ == '__main__':
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("input", type=str, help="Input metadata file")
parser.add_argument("annotator", type=str, help="Your name")
parser.add_argument("--out", type=str, default='', help="Output file. Default to the input_path'.annotator.h5'")
parser.add_argument("--channel", type=int, default=0, help="Sound channel to be analysed. Indices start from 0.")
parser.add_argument("--nb_ind", type=int, default=1, help="Maximum number of individual for each track. -1 for no limit")
parser.add_argument("--equal", action='store_true', help="Adding this tag will limit the track with nb_ind sperm whales")
parser.add_argument("--erase", action='store_true', help="If out file exist and this option is not given,"
" the previous output will be read and appended")
parser.add_argument("--wd", type=str, default='/nfs/NASDELL/SABIOD/SITE/BOMBYX/', help='Path to root dir'
' containing Bombyx files')
parser.add_argument("--done_file", type=str,
default='/nfs/NASDELL/SABIOD/SITE/BOMBYX/manip_2021/IPI_scripts/done_file.csv',
help='Path to file listing files done')
args = parser.parse_args()
sys.exit(main(args))