Skip to content
Snippets Groups Projects
Commit f27548b1 authored by Stephane Chavin's avatar Stephane Chavin
Browse files

V2

parent ea581844
Branches
No related tags found
No related merge requests found
cff-version: 1.2.0 cff-version: 1.2.0
preferred-citation: preferred-citation:
type: Method type: Method
message: If you use YOLO-DYNI method, please cite it as below. message: If you use RAVEN2YOLO method, please cite it as below.
authors: authors:
- family-names: Chavin - family-names: Chavin
given-names: Stéphane given-names: Stéphane
...@@ -9,4 +9,4 @@ preferred-citation: ...@@ -9,4 +9,4 @@ preferred-citation:
version: 1.0 version: 1.0
date-released: 2023-2-16 date-released: 2023-2-16
license: GPL-3.0 license: GPL-3.0
url: "https://gitlab.lis-lab.fr/stephane.chavin/yolo-dyni" url: "https://gitlab.lis-lab.fr/stephane.chavin/raven2yolo"
...@@ -5,29 +5,10 @@ import argparse ...@@ -5,29 +5,10 @@ import argparse
import numpy as np import numpy as np
import soundfile import soundfile
from p_tqdm import p_map from p_tqdm import p_map
<<<<<<< HEAD
from tqdm import tqdm from tqdm import tqdm
import utils import utils
import pandas as pd import pandas as pd
=======
import soundfile as sf
import scipy.signal as signal
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')
def arg_directory(path):
if os.path.isdir(path):
return path
else:
raise argparse.ArgumentTypeError(f'`{path}` is not a valid path')
def create_spectrogram(y, directory, filename, offset, duration, window_arg, hop_length_arg):
window = np.hanning(window_arg)
stft = librosa.core.spectrum.stft(y, n_fft=window_arg, hop_length=hop_length_arg, window=window)
>>>>>>> 729ca99066972cd2d914d6054b6e8885d8e9c856
def main(data, arguments): def main(data, arguments):
""" """
...@@ -44,13 +25,8 @@ def main(data, arguments): ...@@ -44,13 +25,8 @@ def main(data, arguments):
print(f'`{filename}` cannot be open : : {error}') print(f'`{filename}` cannot be open : : {error}')
return return
<<<<<<< HEAD
# Create the list of all possible offset to compute spectrogram # Create the list of all possible offset to compute spectrogram
offset_list = np.arange(0, file_duration, arguments.duration - arguments.overlap) offset_list = np.arange(0, file_duration, arguments.duration - arguments.overlap)
=======
log_stft = np.log10(np.abs(stft))
vmin, vmax = log_stft.mean(), log_stft.max()
>>>>>>> 729ca99066972cd2d914d6054b6e8885d8e9c856
for offset in offset_list: for offset in offset_list:
file = filename.replace('/', '_').split('.', maxsplit=1)[0] file = filename.replace('/', '_').split('.', maxsplit=1)[0]
...@@ -58,6 +34,7 @@ def main(data, arguments): ...@@ -58,6 +34,7 @@ def main(data, arguments):
sig, fs = soundfile.read(filename, start=int( sig, fs = soundfile.read(filename, start=int(
offset*fs), stop=int((offset+arguments.duration)*fs), offset*fs), stop=int((offset+arguments.duration)*fs),
always_2d=True) # Load the signal always_2d=True) # Load the signal
sig = sig[:, 0] # Only take channel 0 sig = sig[:, 0] # Only take channel 0
# Apply resample and low/high pass filter # Apply resample and low/high pass filter
sig = utils.signal_processing( sig = utils.signal_processing(
...@@ -73,7 +50,6 @@ def main(data, arguments): ...@@ -73,7 +50,6 @@ def main(data, arguments):
folder = 'spectrograms' folder = 'spectrograms'
name = os.path.join(arguments.directory, folder, f'{file}_{offset}') name = os.path.join(arguments.directory, folder, f'{file}_{offset}')
<<<<<<< HEAD
utils.create_spectrogram( utils.create_spectrogram(
sig, arguments.directory, name, window_size=arguments.window, sig, arguments.directory, name, window_size=arguments.window,
overlap=arguments.hop) overlap=arguments.hop)
...@@ -84,8 +60,7 @@ def main(data, arguments): ...@@ -84,8 +60,7 @@ def main(data, arguments):
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter, formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Extract spectrogram for each .wav file')
description='Extract spectrogram for each .wav file')
parser.add_argument('path', type=utils.arg_directory, parser.add_argument('path', type=utils.arg_directory,
help='Path of the folder/file that contains the recordings') help='Path of the folder/file that contains the recordings')
parser.add_argument('directory', type=utils.arg_directory, parser.add_argument('directory', type=utils.arg_directory,
...@@ -127,87 +102,3 @@ if __name__ == "__main__": ...@@ -127,87 +102,3 @@ if __name__ == "__main__":
final_dest = os.path.join(directory, dest[0] if len(dest) is list else dest) final_dest = os.path.join(directory, dest[0] if len(dest) is list else dest)
print(f'Saved to {final_dest}') print(f'Saved to {final_dest}')
=======
def process_recordings(data, img_per_rec, args):
_, (i) = data
duration = args.duration
overlap = args.overlap
filename = str(i[0])
try:
info = sf.info(filename)
file_duration, fs = info.duration, info.samplerate
except Exception as error:
print(f'`{filename}` cannot be open... : {error}')
for count in range(img_per_rec):
offset = count * (duration - overlap)
if offset > file_duration:
continue
try:
sig, fs = sf.read(filename, start=int(offset*fs), stop=int((offset+duration)*fs), always_2d=True)
sig = sig[:,0]
if not args.sr:
args.sr = fs
sig = signal_processing(sig, args.sr, fs, args.up, args.low)
create_spectrogram(sig, args.directory, filename, offset, duration, args.window, args.hop)
except Exception:
print(f'`{filename}` cannot be open...')
def signal_processing(sig, sr, fs, up, low):
sig_r = signal.resample(sig, int(len(sig)*sr/fs)) # resample
if up:
sos = signal.butter(2, up/(sr/2), 'hp', output='sos') # create high pass filter
sig_r = signal.sosfilt(sos, sig_r) # apply high pass filter
if low:
sos2 = signal.butter(1, low/(sr/2), 'lp', output='sos') # create low pass filter
sig_r = signal.sosfilt(sos2, sig_r) # create low pass filter
return(sig_r)
if __name__ == "__main__":
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Extract spectrogram for each .wav file')
parser.add_argument('path_to_data', type=arg_directory, help='Path of the folder that contains the recordings')
parser.add_argument('directory', type=arg_directory, help='Directory to which spectrograms will be stored')
parser.add_argument('-m', '--mode', type=str, choices=['unique', 'multiple'], help='if unique, only 1 image per file', default='multiple')
parser.add_argument('-n', '--columns_name', type=str, help='Name of the columns that contain the path of the .wav', default='Path')
parser.add_argument('-f', '--file', type=str, help='Name of the file that contains the recording to print', default='None')
parser.add_argument('--frames', type=int, help='Number of spectrogram per file', default=30)
parser.add_argument('--duration', type=int, help='Duration for each spectrogram', default=8)
parser.add_argument('--overlap', type=int, help='Overlap between 2 spectrograms', default=2)
parser.add_argument('--sr', type=int, help='Sampling rate for the spectrogram. If no argument, '
'SR will be original SR of the recording', default=None)
parser.add_argument('--window', type=int, help='Window size for the Fourier Transform', default=1024)
parser.add_argument('--hop', type=int, help='Hop lenght for the Fourier Transform', default=512)
parser.add_argument('--cpu', type=int, help='To speed up the process, write 2 or more', default=1)
parser.add_argument('--up', type=int, help='High Pass Filter value in Hz', default=10)
parser.add_argument('--low', type=int, help='Low Pass Filter value in Hz', default=None)
args = parser.parse_args()
if args.mode == 'multiple':
img_per_rec = args.frames
elif args.mode == 'unique':
img_per_rec = 1
path_to_data = args.path_to_data
if args.file != 'None':
try :
df = pd.read_csv(args.file, low_memory=False)
except Exception as error:
print('Try to load as pickle...')
df = pd.read_pickle(args.file, low_memory=False)
df['Path'] = df[args.columns_name]
else:
df = pd.DataFrame(glob.glob(os.path.join(path_to_data, '*'), recursive=True), columns=['Path'])
if args.cpu == 1:
for num, row in tqdm(df.iterrows(), total = len(df)):
process_recordings([num, [row.Path]], img_per_rec, args)
final_dest = os.path.join(args.directory,'Spectrogram')
print(f'Saved to {final_dest}')
else:
img_per_rec = [img_per_rec]*len(df.groupby('Path'))
args = [args]*len(df.groupby('Path'))
p_map(process_recordings, enumerate(df.groupby('Path')), img_per_rec, args, num_cpus=args[0].cpu, total=len(df.groupby('Path')))
final_dest = os.path.join(args[0].directory,'Spectrogram')
print(f'Saved to {final_dest}')
>>>>>>> 729ca99066972cd2d914d6054b6e8885d8e9c856
<<<<<<< HEAD
"""Compiles detections into a dataframe and/or into Raven annotation format (.txt)""" """Compiles detections into a dataframe and/or into Raven annotation format (.txt)"""
import argparse import argparse
...@@ -8,40 +7,6 @@ import utils ...@@ -8,40 +7,6 @@ import utils
import xarray as xr import xarray as xr
import pandas as pd import pandas as pd
=======
import pandas as pd
import os
import argparse
import yaml
from datetime import date
from tqdm import tqdm
def arg_directory(path):
if os.path.isdir(path):
return path
else:
raise argparse.ArgumentTypeError(f'`{path}` is not a valid path')
def process_annotations(annotations_folder, duration, outdir, sr, names):
today = date.today()
out_file = f'YOLO_detection_{today.day}_{today.month}_freq_{sr}_duration_{duration}'
df_list = []
for file_name in tqdm(os.listdir(annotations_folder)):
if file_name.endswith('.txt'):
file_path = os.path.join(annotations_folder, file_name)
try:
annotation_df = pd.read_csv(file_path, sep=' ', names=['espece', 'x', 'y', 'w', 'h', 'conf'])
except Exception:
annotation_df = pd.read_csv(file_path, sep=' ', names=['espece', 'x', 'y', 'w', 'h'])
annotation_df['file'] = file_name
annotation_df['idx'] = annotation_df['file'].str.split('_').str[-1].str.split('.').str[0]
annotation_df['file'] = ['.'.join(x.file.split('.')[:-1]) for i, x in annotation_df.iterrows()]
annotation_df.file = annotation_df.file + '.wav'
>>>>>>> 729ca99066972cd2d914d6054b6e8885d8e9c856
def main(arguments): def main(arguments):
""" """
...@@ -78,7 +43,6 @@ def main(arguments): ...@@ -78,7 +43,6 @@ def main(arguments):
if __name__ == "__main__": if __name__ == "__main__":
<<<<<<< HEAD
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description='Collect detections and return a complete dataframe') description='Collect detections and return a complete dataframe')
parser.add_argument('path_to_data', type=utils.arg_directory, parser.add_argument('path_to_data', type=utils.arg_directory,
...@@ -98,19 +62,3 @@ if __name__ == "__main__": ...@@ -98,19 +62,3 @@ if __name__ == "__main__":
args = parser.parse_args() args = parser.parse_args()
main(args) main(args)
=======
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Collect detections and return a complete dataframe')
parser.add_argument('path_to_data', type=arg_directory, help='Path of the folder that contains the .txt files')
parser.add_argument('directory', type=arg_directory, help='Directory where the dataframe will be stored')
parser.add_argument('names', type=str, help='path to YOLOv5 custom_data.yaml file')
parser.add_argument('-s','--sr', type=int, help='Sampling Rate of the spectrogram', required=True)
parser.add_argument('--duration', type=int, help='Duration of the spectrogram', default=8)
args = parser.parse_args()
with open(args.names, 'r') as file:
data = yaml.safe_load(file)
names = data['names']
process_annotations(args.path_to_data, args.duration, args.directory, args.sr, names)
>>>>>>> 729ca99066972cd2d914d6054b6e8885d8e9c856
...@@ -3,20 +3,21 @@ ...@@ -3,20 +3,21 @@
import argparse import argparse
import random import random
import os import os
import sys
import cv2
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import soundfile as sf import soundfile as sf
import cv2
import sys
import subprocess
import utils import utils
from p_tqdm import p_map from p_tqdm import p_map
from tqdm import tqdm from tqdm import tqdm
def process(entry, arguments, species_list, colors): def process(entry, arguments, species_list):
""" """
Precess the annotation to get the .jpg spectrogram and the .txt annotation file Precess the annotation to get the .jpg spectrogram and the .txt annotation file
:param x (tuple): Enumerate number, [filename, group] per file :param x (tuple): Enumerate number, [filename, group] per file
...@@ -28,8 +29,8 @@ def process(entry, arguments, species_list, colors): ...@@ -28,8 +29,8 @@ def process(entry, arguments, species_list, colors):
try: try:
info = sf.info(filename) # Collection recording information info = sf.info(filename) # Collection recording information
file_duration, fs = info.duration, info.samplerate file_duration, fs = info.duration, info.samplerate
except Exception as e: except Exception as error:
print(f'`{filename}` cannot be open... : {e}') print(f'`{filename}` cannot be open... : {error}')
return return
# create the time list between 0 and 1000 * duration # create the time list between 0 and 1000 * duration
...@@ -62,12 +63,12 @@ def process(entry, arguments, species_list, colors): ...@@ -62,12 +63,12 @@ def process(entry, arguments, species_list, colors):
sig, fs = sf.read(filename, start=int( sig, fs = sf.read(filename, start=int(
offset*fs), stop=int((offset+arguments.duration)*fs), always_2d=True) # Load the signal offset*fs), stop=int((offset+arguments.duration)*fs), always_2d=True) # Load the signal
sig = sig[:, 0] # Only take channel 0 sig = sig[:, 0] # Only take channel 0
if arguments.rf is None: if arguments.rf == None:
arguments.rf = fs arguments.rf = fs
# Apply resample and low/high pass filter # Apply resample and low/high pass filter
sig = utils.signal_processing( sig = utils.signal_processing(
sig, rf=arguments.rf, fs=fs, high=arguments.high, low=arguments.low) sig, rf=arguments.rf, fs=fs, high=arguments.high, low=arguments.low)
_ = utils.create_spectrogram( fig = utils.create_spectrogram(
sig, arguments.directory, names=None, sig, arguments.directory, names=None,
window_size=arguments.window, window_size=arguments.window,
overlap=arguments.hop) overlap=arguments.hop)
...@@ -168,13 +169,11 @@ if __name__ == '__main__': ...@@ -168,13 +169,11 @@ if __name__ == '__main__':
parser.add_argument( parser.add_argument(
'--rf', type=int, help='Frequency Resampling ', default=None) '--rf', type=int, help='Frequency Resampling ', default=None)
parser.add_argument( parser.add_argument(
'--window', type=int, help='Window size for the Fourier Transform', '--window', type=int, help='Window size for the Fourier Transform', default=1024)
default=1024)
parser.add_argument( parser.add_argument(
'--hop', type=float, help='Ratio of hop in window : 50%% = 0.5', default=.5) '--hop', type=float, help='Ratio of hop in window : 50%% = 0.5', default=.5)
parser.add_argument( parser.add_argument(
'--cpu', type=int, help='To speed up the process, write 2 or more', '--cpu', type=int, help='To speed up the process, write 2 or more', default=1)
default=1)
parser.add_argument('--high', type=int, parser.add_argument('--high', type=int,
help='High Pass Filter value in Hz', default=10) help='High Pass Filter value in Hz', default=10)
parser.add_argument('--low', type=int, parser.add_argument('--low', type=int,
...@@ -192,18 +191,16 @@ if __name__ == '__main__': ...@@ -192,18 +191,16 @@ if __name__ == '__main__':
for file, _ in df.groupby('Path'): for file, _ in df.groupby('Path'):
new_df = utils.open_file(file) new_df = utils.open_file(file)
if len(new_df) >= 1: if len(new_df) >= 1:
new_df['Path'] = os.path.join(args.path_to_data, new_df['Path'] = os.path.join(args.path_to_data, str(file.split('.Table')[0]+f'.{suffix}'))
str(file.split('.Table')[0]+f'.{suffix}'))
final.append(new_df) final.append(new_df)
else: else:
continue continue
df = pd.concat(final) df = pd.concat(final)
elif 'Path' not in df.columns: elif 'Path' not in df.columns:
df['Path'] = os.path.join(args.path_to_data, df['Path'] = os.path.join(args.path_to_data, args.filename_path.split('/')[-1].split('.Table')[0]+f'.{suffix}')
args.filename_path.split('/')[-1].split('.Table')[0]+f'.{suffix}')
df, species = utils.prepare_dataframe(df, args) df, species = utils.prepare_dataframe(df, args)
list_colors = [(random.randint(0, 255), random.randint(0, 255), colors = [(random.randint(0, 255), random.randint(0, 255),
random.randint(0, 255)) for _ in range(len(species))] random.randint(0, 255)) for _ in range(len(species))]
species.to_csv(os.path.join( species.to_csv(os.path.join(
...@@ -212,13 +209,12 @@ if __name__ == '__main__': ...@@ -212,13 +209,12 @@ if __name__ == '__main__':
if args.cpu == 1: if args.cpu == 1:
for i in tqdm(enumerate(df.groupby('Path')), total=len(df.groupby('Path')), for i in tqdm(enumerate(df.groupby('Path')), total=len(df.groupby('Path')),
desc="Processing", ascii='░▒▓█'): desc="Processing", ascii='░▒▓█'):
process(i, args, species, list_colors) process(i, args, species)
else: else:
args = [args for _ in range(len(df.groupby('Path')))] args = [args for i in range(len(df.groupby('Path')))]
species = [species for _ in range(len(df.groupby('Path')))] species = [species for i in range(len(df.groupby('Path')))]
list_colors = [list_colors for _ in range(len(df.groupby('Path')))]
p_map(process, enumerate(df.groupby('Path')), args, p_map(process, enumerate(df.groupby('Path')), args,
species, list_colors, num_cpus=args[0].cpu, total=len(df.groupby('Path'))) species, num_cpus=args[0].cpu, total=len(df.groupby('Path')))
args = args[0] args = args[0]
print('saved to', args.directory) print('saved to', args.directory)
...@@ -241,6 +237,7 @@ if __name__ == '__main__': ...@@ -241,6 +237,7 @@ if __name__ == '__main__':
try : try :
# Run the split command # Run the split command
os.system(f'{sys.executable} {script} {data_path} {directory_path} -r 0.7 --test') os.system(f'{sys.executable} {script} {data_path} {directory_path} -r 0.7 --test')
print(f'Train saved in {directory_path}\n') print(f'Train saved in {directory_path}\n')
print('To train your model, use the following command : \n') print('To train your model, use the following command : \n')
...@@ -253,8 +250,7 @@ if __name__ == '__main__': ...@@ -253,8 +250,7 @@ if __name__ == '__main__':
print(command,'\n') print(command,'\n')
print('\u26A0\uFE0F Be aware that it is recommended to have background images that', print('\u26A0\uFE0F Be aware that it is recommended to have background images that',
' represents 10% of your dataset. To do so, please use the script "get_spectrogram.py"', ' represents 10% of your dataset. To do so, please use the script "get_spectrogram.py"',
' with --background arguments. Comptue on recordings that contains multiple', ' with --background arguments. Comptue on recordings that contains multiple type of noise...')
' type of noise...')
except Exception as error: except Exception as error:
print(error) print(error)
<<<<<<< HEAD
"""Separates training and validation datasets in a balanced manner""" """Separates training and validation datasets in a balanced manner"""
import argparse import argparse
...@@ -56,129 +55,10 @@ def export_split(entry, path, directory): ...@@ -56,129 +55,10 @@ def export_split(entry, path, directory):
os.path.join(directory, 'custom_data.yaml')) os.path.join(directory, 'custom_data.yaml'))
with open(os.path.join(directory, 'custom_data.yaml'), 'w', encoding='utf-8') as f: with open(os.path.join(directory, 'custom_data.yaml'), 'w', encoding='utf-8') as f:
=======
import yaml
import os
import pandas as pd
from tqdm import tqdm
import numpy as np
import shutil
import argparse
def arg_directory(path):
if os.path.isdir(path):
return path
else:
raise argparse.ArgumentTypeError(f'`{path}` is not a valid path')
def create_directory_if_not_exists(directory):
if not os.path.exists(directory):
os.mkdir(directory)
def copy_files_to_directory(file_list, source_dir, destination_dir, suffix):
for file_name in file_list:
source_path = os.path.join(source_dir, f'{file_name}.{suffix}')
destination_path = os.path.join(destination_dir, f'{file_name}.{suffix}')
shutil.copy2(source_path, destination_path)
def split(df, ratio):
classes = df.espece.unique()
n_class = classes.size
train_count = pd.DataFrame(np.zeros((n_class, 1)), index=classes)
test_count = train_count.copy()
train_df = pd.DataFrame()
test_df = pd.DataFrame()
for i, c in enumerate(classes):
try:
sdf = df.groupby('espece').get_group(c)
except Exception:
continue
if train_count.loc[c].iloc[0] == 0:
f = sdf.sample(1).file.iloc[0]
mask = df.file == f
train_count = train_count.add(df[mask].espece.value_counts(), axis = 0).fillna(0)
train_df = pd.concat([train_df,df[mask]])
df = df[~mask]
if test_count.loc[c].iloc[0] == 0:
f = sdf.sample(1).file.iloc[0]
mask = df.file == f
test_count = test_count.add(df[mask].espece.value_counts(), axis = 0).fillna(0)
test_df = pd.concat([test_df, df[mask]])
df = df[~mask]
while len(df):
min_esp = df.groupby('espece').count().file.idxmin()
sdf = df.groupby('espece').get_group(min_esp)
f = sdf.sample(1).file.iloc[0]
if (train_count.loc[min_esp]/(test_count.loc[min_esp] + train_count.loc[min_esp]))[0] > ratio:
test_count.loc[min_esp] += df[df.file == f].groupby('espece').count().iloc[0].file
test_df = pd.concat([test_df,df[df.file == f]])
else:
train_count.loc[min_esp] += df[df.file == f].groupby('espece').count().iloc[0].file
train_df = pd.concat([train_df, df[df.file == f]])
df = df[df.file != f]
print('\nratio', train_count/(test_count + train_count))
return train_df, test_df
def process_data(args):
path = args.path_to_data
directory = args.directory
df = pd.concat({f: pd.read_csv(os.path.join(path, f), sep=' ', names=['espece', 'x', 'y', 'w', 'h'])
for f in tqdm(os.listdir(path))}, names=['file'])
df = df.reset_index(level=[0])
df = df.reset_index()
del df['index']
df = df[df.espece != 'y']
df.espece = df.espece.astype(float)
tab = df.groupby('espece').count()
tab = tab.sort_values(tab.columns[0], ascending=False)
compte = pd.DataFrame(np.zeros((len(tab) + 1, 1)), columns=['nombre'])
return df
def export_split(entry, path, directory):
val = entry[0]
train = entry[1]
create_directory_if_not_exists(os.path.join(directory, 'images'))
create_directory_if_not_exists(os.path.join(directory, 'labels'))
if args.test == 1:
test = entry[2]
test.file = ['.'.join(x.split('.')[:-1]) for num, x in enumerate(test.file)]
create_directory_if_not_exists(os.path.join(directory, 'images/test'))
create_directory_if_not_exists(os.path.join(directory, 'labels/test'))
copy_files_to_directory(test.file, path, os.path.join(directory, 'labels/test'), 'txt')
copy_files_to_directory(test.file, os.path.join(path, '../images/all'), os.path.join(directory, 'images/test'), 'jpg')
val.file = ['.'.join(x.split('.')[:-1]) for num, x in enumerate(val.file)]
train.file = ['.'.join(x.split('.')[:-1]) for num, x in enumerate(train.file)]
create_directory_if_not_exists(os.path.join(directory, 'images/train'))
create_directory_if_not_exists(os.path.join(directory, 'images/val'))
create_directory_if_not_exists(os.path.join(directory, 'labels/train'))
create_directory_if_not_exists(os.path.join(directory, 'labels/val'))
copy_files_to_directory(val.file, path, os.path.join(directory, 'labels/val'), 'txt')
copy_files_to_directory(val.file, os.path.join(path, '../images/all'), os.path.join(directory, 'images/val'), 'jpg')
copy_files_to_directory(train.file, path, os.path.join(directory, 'labels/train'), 'txt')
copy_files_to_directory(train.file, os.path.join(path, '../images/all'), os.path.join(directory, 'images/train'), 'jpg')
try:
liste_espece = pd.read_csv(os.path.join(path, '../liste_especes.csv'))
except Exception:
print('No species list detected, please add it to', os.path.join(directory, 'custom_data.yaml'))
with open(os.path.join(directory, 'custom_data.yaml'), 'w') as f:
>>>>>>> 729ca99066972cd2d914d6054b6e8885d8e9c856
if args.test == 1: if args.test == 1:
f.write(f'test: {os.path.join(directory, "images/test")}\n') f.write(f'test: {os.path.join(directory, "images/test")}\n')
f.write(f'train: {os.path.join(directory, "images/train")}\n') f.write(f'train: {os.path.join(directory, "images/train")}\n')
f.write(f'val: {os.path.join(directory, "images/val")}\n') f.write(f'val: {os.path.join(directory, "images/val")}\n')
<<<<<<< HEAD
f.write(f'nc: {len(species_list)}\n') f.write(f'nc: {len(species_list)}\n')
f.write(f'names: {species_list.species.tolist()}') f.write(f'names: {species_list.species.tolist()}')
...@@ -247,23 +127,3 @@ if __name__ == '__main__': ...@@ -247,23 +127,3 @@ if __name__ == '__main__':
'represents 10% of your dataset. To do so, please use the script "get_spectrogram.py"', 'represents 10% of your dataset. To do so, please use the script "get_spectrogram.py"',
'with --background arguments. Comptue on recordings that contains multiple type of noise...') 'with --background arguments. Comptue on recordings that contains multiple type of noise...')
\ No newline at end of file
=======
f.write(f'nc: {len(liste_espece)}\n')
f.write(f'names: {liste_espece.espece.tolist()}')
if __name__ == '__main__':
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
parser.add_argument('-r', '--ratio', type=float, default=0.7, help='Train Ratio (val = 1 - ratio)')
parser.add_argument('-p', '--path_to_data', type=arg_directory, help='Path of the folder that contains the .txt (ending with labels/)', required=True)
parser.add_argument('-d', '--directory', type=arg_directory, help='Directory to which spectrogram and .txt files will be stored (different from -p)', required=True)
parser.add_argument('--test', type=int, help='1 if True. Split into train/test/val. 1 - Ratio / 2 for test and same for validation', default=0)
args = parser.parse_args()
df = process_data(args)
train, val = split(df, args.ratio)
if args.test == 1:
val, test = split(val, 0.5)
export_split([val, train, test], args.path_to_data, args.directory)
else :
export_split([val, train], args.path_to_data, args.directory)
>>>>>>> 729ca99066972cd2d914d6054b6e8885d8e9c856
...@@ -2,50 +2,11 @@ ...@@ -2,50 +2,11 @@
import os import os
import json import json
<<<<<<< HEAD
import argparse import argparse
from glob import glob from glob import glob
import utils import utils
=======
import base64
import argparse
from glob import glob
from pathlib import Path
def arg_directory(path):
if os.path.isdir(path):
return path
else:
raise argparse.ArgumentTypeError(f'`{path}` is not a valid path')
def create_directory(directory):
try:
os.makedirs(os.path.join(directory,'labels'), exist_ok=True)
except Exception as e:
print(f'`{path}` has been created')
return (directory)
def convert_labelme_to_yolo(labelme_annotation_path, yolo_directory):
# Load LabelMe annotation
image_id = Path(labelme_annotation_path).stem
with open(labelme_annotation_path, 'r') as labelme_annotation_file:
labelme_annotation = json.load(labelme_annotation_file)
# YOLO annotation and image paths
yolo_annotation_path = os.path.join(yolo_directory, 'labels', f'{image_id}.txt')
yolo_image_path = os.path.join(yolo_directory, 'images/all', f'{image_id}.jpg')
with open(yolo_annotation_path, 'w') as yolo_annotation_file:
yolo_image_data = base64.b64decode(labelme_annotation['imageData'])
# Write YOLO image
with open(yolo_image_path, 'wb') as yolo_image_file:
yolo_image_file.write(yolo_image_data)
>>>>>>> 729ca99066972cd2d914d6054b6e8885d8e9c856
<<<<<<< HEAD
def main(arguments): def main(arguments):
""" """
Launch the processing on each .txt. Launch the processing on each .txt.
...@@ -60,32 +21,6 @@ def main(arguments): ...@@ -60,32 +21,6 @@ def main(arguments):
utils.labelme2yolo(labelme_annotation_path, arguments.directory) utils.labelme2yolo(labelme_annotation_path, arguments.directory)
# Add all the information in the .txt file # Add all the information in the .txt file
with open(labelme_annotation_path, 'r', encoding='utf-8') as labelme_annotation_file: with open(labelme_annotation_path, 'r', encoding='utf-8') as labelme_annotation_file:
=======
label = shape['label']
# shape['points'] format : [[x1,y1],[x2,y2]...] #
scale_width = 1.0 / labelme_annotation['imageWidth']
scale_height = 1.0 / labelme_annotation['imageHeight']
width = abs(shape['points'][1][0] - shape['points'][0][0]) * scale_width
height = abs(shape['points'][1][1] - shape['points'][0][1]) * scale_height
x = min(shape['points'][0][0], shape['points'][1][0]) * scale_width + width / 2
y = min(shape['points'][0][1], shape['points'][1][1]) * scale_height + height / 2
if x+width/2 > 1 or y+height/2>1:
print(f'Error with bounding box values over 1 in file {yolo_image_file}')
annotation_line = f'{label} {x} {y} {width} {height}\n'
yolo_annotation_file.write(annotation_line)
def main(args):
yolo_names = set()
create_directory(os.path.join(args.directory, 'images'))
create_directory(os.path.join(args.directory, 'images/all'))
create_directory(os.path.join(args.directory, 'labels'))
for labelme_annotation_path in glob(f'{args.path_to_data}/*.json'):
convert_labelme_to_yolo(labelme_annotation_path, args.directory)
with open(labelme_annotation_path, 'r') as labelme_annotation_file:
>>>>>>> 729ca99066972cd2d914d6054b6e8885d8e9c856
labelme_annotation = json.load(labelme_annotation_file) labelme_annotation = json.load(labelme_annotation_file)
for shape in labelme_annotation['shapes']: for shape in labelme_annotation['shapes']:
......
# Raven2YOLO requirements
# Usage: pip install -r requirements.txt
# Base ------------------------------------------------------------------------
<<<<<<< HEAD
argparse==1.4.0 # Command-line argument parsing
matplotlib>=3.2.2 # Plotting library
numpy>=1.18.5 # Numerical computing library
opencv-python>=4.1.1 # Computer vision library
Pillow>=7.1.2 # Python Imaging Library
PyYAML>=5.3.1 # YAML parser and emitter
os>=0.11.0 # Operating system interface
pandas>=2.2.2 # Data manipulation and analysis
tqdm>=4.64.0 # Progress bar library
=======
gitpython>=3.1.30
globox==2.4.5
matplotlib>=3.2.2
multiprocess==0.70.16
mycolorpy==1.5.1
numpy>=1.18.5
opencv-python>=4.1.1
Pillow>=7.1.2
psutil # system resources
p-tqdm==1.4.0
PyYAML>=5.3.1
requests>=2.23.0
scipy>=1.4.1
soundfile>=0.11.0
thop>=0.1.1 # FLOPs computation
torch>=1.7.0 # see https://pytorch.org/get-started/locally (recommended)
torchvision>=0.8.1
tqdm>=4.64.0
labelme==5.1.1
librosa==0.9.2
# protobuf<=3.20.1 # https://github.com/ultralytics/yolov5/issues/8012
albumentations>=1.0.3 #data augmentation (optional)
# Logging ---------------------------------------------------------------------
tensorboard>=2.4.1
# clearml>=1.2.0
# comet
>>>>>>> 729ca99066972cd2d914d6054b6e8885d8e9c856
# Plotting --------------------------------------------------------------------
matplotlib>=3.7.1 # Plotting library
seaborn>=0.11.0 # Statistical data visualization
# Extras ----------------------------------------------------------------------
<<<<<<< HEAD
ipdb>=0.13.9 # IPython debugger
ipython>=8.1.1 # Interactive Python shell
# Additional Packages ---------------------------------------------------------
glob>=1.3.0 # Unix-style pathname pattern expansion
shutil>=1.0.0 # High-level file operations
base64>=1.2.0 # Base16, Base32, Base64, Base85 data encoding
json>=1.0.0 # JSON encoder/decoder
datetime>=1.0.0 # Basic date and time types
pathlib>=1.0.0 # Object-oriented filesystem paths
librosa>=0.9.2 # Audio and music processing library
scipy>=1.8.0 # Scientific library for numerical computations
cv2>=1.0.0 # OpenCV computer vision library
soundfile>=0.11.0 # Sound library
p_tqdm==1.4.0 # Parallel tqdm (progress bar) for Python
yaml==6.0 # YAML parser and emitter
torch==1.12.0 # PyTorch deep learning framework
albumentations>=1.0.3 # Image augmentation library
scikit-learn<=1.1.2 # Machine learning library
=======
# ipython # interactive notebook
# mss # screenshots
# pycocotools>=2.0.6 # COCO mAP
# roboflow
# ultralytics # HUB https://hub.ultralytics.com
>>>>>>> 729ca99066972cd2d914d6054b6e8885d8e9c856
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment