Skip to content
Snippets Groups Projects
Commit 58dbf526 authored by Stephane Chavin's avatar Stephane Chavin
Browse files

correct multiprocessing error and remove input argument

parent 5cccc9d9
No related branches found
No related tags found
No related merge requests found
...@@ -7,6 +7,8 @@ import pandas as pd ...@@ -7,6 +7,8 @@ import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from p_tqdm import p_map from p_tqdm import p_map
import soundfile as sf import soundfile as sf
import scipy.signal as signal
from tqdm import tqdm
import warnings import warnings
warnings.filterwarnings('ignore') warnings.filterwarnings('ignore')
...@@ -42,14 +44,17 @@ def process_recordings(data, img_per_rec, args): ...@@ -42,14 +44,17 @@ def process_recordings(data, img_per_rec, args):
_, (i) = data _, (i) = data
duration = args.duration duration = args.duration
overlap = args.overlap overlap = args.overlap
for count in range(img_per_rec):
offset = count * (duration - overlap)
filename = str(i[0]) filename = str(i[0])
try: try:
info = sf.info(filename) info = sf.info(filename)
_, fs = info.duration, info.samplerate file_duration, fs = info.duration, info.samplerate
except Exception as error:
print(f'`{filename}` cannot be open... : {error}')
for count in range(img_per_rec):
offset = count * (duration - overlap)
if offset > file_duration:
continue
try:
sig, fs = sf.read(filename, start=int(offset*fs), stop=int((offset+duration)*fs), always_2d=True) sig, fs = sf.read(filename, start=int(offset*fs), stop=int((offset+duration)*fs), always_2d=True)
sig = sig[:,0] sig = sig[:,0]
sig = signal_processing(sig, args.sr, fs, args.up, args.low) sig = signal_processing(sig, args.sr, fs, args.up, args.low)
...@@ -72,8 +77,6 @@ if __name__ == "__main__": ...@@ -72,8 +77,6 @@ if __name__ == "__main__":
parser.add_argument('directory', type=arg_directory, help='Directory to which spectrograms will be stored') parser.add_argument('directory', type=arg_directory, help='Directory to which spectrograms will be stored')
parser.add_argument('-m', '--mode', type=str, choices=['unique', 'multiple'], help='if unique, only 1 image per file', default='multiple') parser.add_argument('-m', '--mode', type=str, choices=['unique', 'multiple'], help='if unique, only 1 image per file', default='multiple')
parser.add_argument('-n', '--columns_name', type=str, help='Name of the columns that contain the path of the .wav', default='Path') parser.add_argument('-n', '--columns_name', type=str, help='Name of the columns that contain the path of the .wav', default='Path')
parser.add_argument('-i', '--input', type=str, choices=['file', 'folder'], help='Choose "file" if you have a .csv file or "folder" to export '
'spectrogram from all the .wav of a folder', default='folder')
parser.add_argument('-f', '--file', type=str, help='Name of the file that contains the recording to print', default='None') parser.add_argument('-f', '--file', type=str, help='Name of the file that contains the recording to print', default='None')
parser.add_argument('--frames', type=int, help='Number of spectrogram per file', default=30) parser.add_argument('--frames', type=int, help='Number of spectrogram per file', default=30)
parser.add_argument('--duration', type=int, help='Duration for each spectrogram', default=8) parser.add_argument('--duration', type=int, help='Duration for each spectrogram', default=8)
...@@ -94,17 +97,24 @@ if __name__ == "__main__": ...@@ -94,17 +97,24 @@ if __name__ == "__main__":
path_to_data = args.path_to_data path_to_data = args.path_to_data
if args.input == 'file': if args.file != 'None':
try :
df = pd.read_csv(args.file, low_memory=False) df = pd.read_csv(args.file, low_memory=False)
except Exception as error:
print('Try to load as pickle...')
df = pd.read_pickle(args.file, low_memory=False)
df['Path'] = df[args.columns_name] df['Path'] = df[args.columns_name]
elif args.input == 'folder':
if args.file != 'None':
df = pd.DataFrame(glob.glob(os.path.join(path_to_data, args.file), recursive=True), columns=['Path'])
else: else:
df = pd.DataFrame(glob.glob(os.path.join(path_to_data, '*'), recursive=True), columns=['Path']) df = pd.DataFrame(glob.glob(os.path.join(path_to_data, '*'), recursive=True), columns=['Path'])
if args.cpu == 1:
for num, row in tqdm(df.iterrows(), total = len(df)):
process_recordings([num, [row.Path]], img_per_rec, args)
final_dest = os.path.join(args.directory,'Spectrogram')
print(f'Saved to {final_dest}')
else:
img_per_rec = [img_per_rec]*len(df.groupby('Path')) img_per_rec = [img_per_rec]*len(df.groupby('Path'))
args = [args]*len(df.groupby('Path')) args = [args]*len(df.groupby('Path'))
p_map(process_recordings, enumerate(df.groupby('Path')), img_per_rec, args, num_cpus=args[0].cpu, total=len(df.groupby('Path'))) p_map(process_recordings, enumerate(df.groupby('Path')), img_per_rec, args, num_cpus=args[0].cpu, total=len(df.groupby('Path')))
print(f'Saved to {args[0].directory}/Spectrogram') final_dest = os.path.join(args[0].directory,'Spectrogram')
print(f'Saved to {final_dest}')
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment