correct multiprocessing error and remove input argument

58dbf526 · Stephane Chavin · 5cccc9d9 · 58dbf526
Commit 58dbf526 authored 1 year ago by Stephane Chavin
--- a/get_spectrogram.py
+++ b/get_spectrogram.py
@@ -7,6 +7,8 @@ import pandas as pd
 import matplotlib.pyplot as plt
 from p_tqdm import p_map
 import soundfile as sf
+import scipy.signal as signal
+from tqdm import tqdm
 import warnings
 warnings.filterwarnings('ignore')
@@ -42,14 +44,17 @@ def process_recordings(data, img_per_rec, args):
    _, (i) = data
    duration = args.duration
    overlap = args.overlap
-    for count in range(img_per_rec):
-        offset = count * (duration - overlap)
    filename = str(i[0])
    try:
        info = sf.info(filename)
-            _, fs = info.duration, info.samplerate
+        file_duration, fs = info.duration, info.samplerate
+    except Exception as error:
+        print(f'`{filename}` cannot be open... : {error}')
+    for count in range(img_per_rec):
+        offset = count * (duration - overlap)
+        if offset > file_duration:
+            continue
+        try:
            sig, fs = sf.read(filename, start=int(offset*fs), stop=int((offset+duration)*fs), always_2d=True)
            sig = sig[:,0]
            sig = signal_processing(sig, args.sr, fs, args.up, args.low)
@@ -72,8 +77,6 @@ if __name__ == "__main__":
    parser.add_argument('directory', type=arg_directory, help='Directory to which spectrograms will be stored')
    parser.add_argument('-m', '--mode', type=str, choices=['unique', 'multiple'], help='if unique, only 1 image per file', default='multiple')
    parser.add_argument('-n', '--columns_name', type=str, help='Name of the columns that contain the path of the .wav', default='Path')
-    parser.add_argument('-i', '--input', type=str, choices=['file', 'folder'], help='Choose "file" if you have a .csv file or "folder" to export '
-                                                                            'spectrogram from all the .wav of a folder', default='folder')
    parser.add_argument('-f', '--file', type=str, help='Name of the file that contains the recording to print', default='None')
    parser.add_argument('--frames', type=int, help='Number of spectrogram per file', default=30)
    parser.add_argument('--duration', type=int, help='Duration for each spectrogram', default=8)
@@ -94,17 +97,24 @@ if __name__ == "__main__":
    path_to_data = args.path_to_data
-    if args.input == 'file':
+    if args.file != 'None':
+        try : 
            df = pd.read_csv(args.file, low_memory=False)
+        except Exception as error:
+            print('Try to load as pickle...')
+            df = pd.read_pickle(args.file, low_memory=False)
        df['Path'] = df[args.columns_name]
-    elif args.input == 'folder':
-        if args.file != 'None':
-            df = pd.DataFrame(glob.glob(os.path.join(path_to_data, args.file), recursive=True), columns=['Path'])
    else:
        df = pd.DataFrame(glob.glob(os.path.join(path_to_data, '*'), recursive=True), columns=['Path'])   
+    if args.cpu == 1:
+        for num, row in tqdm(df.iterrows(), total = len(df)):
+            process_recordings([num, [row.Path]], img_per_rec, args)
+        final_dest = os.path.join(args.directory,'Spectrogram')
+        print(f'Saved to {final_dest}')
+    else:
        img_per_rec = [img_per_rec]*len(df.groupby('Path'))
        args = [args]*len(df.groupby('Path')) 
        p_map(process_recordings, enumerate(df.groupby('Path')), img_per_rec, args, num_cpus=args[0].cpu, total=len(df.groupby('Path')))
-    print(f'Saved to {args[0].directory}/Spectrogram')
+        final_dest = os.path.join(args[0].directory,'Spectrogram')
+        print(f'Saved to {final_dest}')
\ No newline at end of file