diff --git a/get_train_annot.py b/get_train_annot.py index a0f4c267e029db59dfaac9a7c70f2efcc01404b7..92ee2b4f7f3b6babba76ddccd57cea9db5a28107 100755 --- a/get_train_annot.py +++ b/get_train_annot.py @@ -35,7 +35,7 @@ def main(entry, arguments, species_list): offset_list = np.arange( 0, file_duration, arguments.duration - arguments.overlap) new_pos = utils.split_annotations( - grp[['start', 'stop']], arguments.duration) + grp, arguments.duration) grp = pd.merge(grp, new_pos) while len(grp) != 0: # collect all the data between the offset and duration-overlap diff --git a/utils.py b/utils.py index 104c4ca861d66ed5ca5eda468676c4e05fb37b38..412072869876859348a46f8dfaaa1dc3d50481f8 100755 --- a/utils.py +++ b/utils.py @@ -490,23 +490,33 @@ def split_annotations(df, duration=8): current_chunk_end = (start // duration + 1) * duration if end > current_chunk_end: - if (current_chunk_end - start) >= (end - start) * 0.5: - # Split the annotation - splited_annotations.append( - {'start': start, 'stop': current_chunk_end}) - start = current_chunk_end + # Check for the first part of the annotation + if (current_chunk_end - start) > (end - start) * 0.2 and (current_chunk_end - start) < (end - start) * 0.80: + # Split the annotation into 2 new annotations + new_row = row.copy() + row['stop'] = current_chunk_end + new_row['start'] = current_chunk_end + splited_annotations.append(pd.DataFrame(row).T) + splited_annotations.append(pd.DataFrame(new_row).T) + break + elif (current_chunk_end - start) <= (end - start) * 0.2: + # If the first segment is less than 20% of the annotation + # only keep the second part + row['start'] = current_chunk_end + splited_annotations.append(pd.DataFrame(row).T) + break else: - # If the remaining segment is less than half of the annotation - # only keep the longest part - splited_annotations.append( - {'start': current_chunk_end, 'stop': end}) + # If the first segment is more than 80% of the annotation + # only keep the first part + row['stop'] = current_chunk_end + splited_annotations.append(pd.DataFrame(row).T) break else: # This annotation fits within the current chunk - splited_annotations.append({'start': start, 'stop': end}) + splited_annotations.append(pd.DataFrame(row).T) break - return pd.DataFrame(splited_annotations) + return pd.concat(splited_annotations) def get_box_shape(info, im):