From efef6ebc388a61e002945e8b8988b630bf5b659c Mon Sep 17 00:00:00 2001 From: Stephane Chavin <stephane.chavin@lis-lab.fr> Date: Thu, 30 Jan 2025 16:47:43 +0100 Subject: [PATCH] correct split between annotations --- get_train_annot.py | 2 +- utils.py | 32 +++++++++++++++++++++----------- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/get_train_annot.py b/get_train_annot.py index a0f4c26..92ee2b4 100755 --- a/get_train_annot.py +++ b/get_train_annot.py @@ -35,7 +35,7 @@ def main(entry, arguments, species_list): offset_list = np.arange( 0, file_duration, arguments.duration - arguments.overlap) new_pos = utils.split_annotations( - grp[['start', 'stop']], arguments.duration) + grp, arguments.duration) grp = pd.merge(grp, new_pos) while len(grp) != 0: # collect all the data between the offset and duration-overlap diff --git a/utils.py b/utils.py index 104c4ca..4120728 100755 --- a/utils.py +++ b/utils.py @@ -490,23 +490,33 @@ def split_annotations(df, duration=8): current_chunk_end = (start // duration + 1) * duration if end > current_chunk_end: - if (current_chunk_end - start) >= (end - start) * 0.5: - # Split the annotation - splited_annotations.append( - {'start': start, 'stop': current_chunk_end}) - start = current_chunk_end + # Check for the first part of the annotation + if (current_chunk_end - start) > (end - start) * 0.2 and (current_chunk_end - start) < (end - start) * 0.80: + # Split the annotation into 2 new annotations + new_row = row.copy() + row['stop'] = current_chunk_end + new_row['start'] = current_chunk_end + splited_annotations.append(pd.DataFrame(row).T) + splited_annotations.append(pd.DataFrame(new_row).T) + break + elif (current_chunk_end - start) <= (end - start) * 0.2: + # If the first segment is less than 20% of the annotation + # only keep the second part + row['start'] = current_chunk_end + splited_annotations.append(pd.DataFrame(row).T) + break else: - # If the remaining segment is less than half of the annotation - # only keep the longest part - splited_annotations.append( - {'start': current_chunk_end, 'stop': end}) + # If the first segment is more than 80% of the annotation + # only keep the first part + row['stop'] = current_chunk_end + splited_annotations.append(pd.DataFrame(row).T) break else: # This annotation fits within the current chunk - splited_annotations.append({'start': start, 'stop': end}) + splited_annotations.append(pd.DataFrame(row).T) break - return pd.DataFrame(splited_annotations) + return pd.concat(splited_annotations) def get_box_shape(info, im): -- GitLab