Skip to content
Snippets Groups Projects
Select Git revision
  • 6d6ecda4fa8125b20d2d5635d4d7b0b87a8bf042
  • master default protected
2 results

conllulib.py

Blame
  • get_spectrogram.py 2.86 KiB
    import os
    import pandas as pd
    import librosa
    import numpy as np
    import matplotlib.pyplot as plt
    from tqdm import tqdm
    import ipdb
    import argparse
    from p_tqdm import p_map
    import warnings
    warnings.filterwarnings('ignore')
    
    def arg_directory(path):
        if os.path.isdir(path):
            return path
        else:
            raise argparse.ArgumentTypeError(f'`{path}` is not a valid path')
    
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
    parser.add_argument('-f','--file', type=str,required=True,help = 'Name of the file that contain the recording to print')
    parser.add_argument('-p','--path_to_data', type=arg_directory, help = 'Path of the folder that contain the recordings',required=True)
    parser.add_argument('-d','--direction', type=arg_directory, help = 'Directory to wich spectrogram will be stored',required=True)
    parser.add_argument('-m','--mode', type=str,choices=['unique','multiple'], help = 'Direction of the saved spectrogram',required=True)
    parser.add_argument('-n','--columns_name', type=str, help = 'Name of the columns that contain the path of the .wav',required=True)
    args = parser.parse_args()
    
    path_to_data = args.path_to_data
    direction = args.direction
    folder = 'Spectrogram/'
    DURATION = 5
    OVERLAP = 2
    if args.mode == 'multiple':
        NB_IMG_PER_REC = 10 
        
    elif args.mode == 'unique': 
        NB_IMG_PER_REC = 1
    
    df = pd.read_csv(args.file,low_memory=False)
    df['Path'] = df.[args.columns_name]
    
    def process(x):
        _, (i) = x
    
        for count, j in enumerate(range (NB_IMG_PER_REC)): #30*8 secondes - 30*2 secondes (overlap) = 180 secondes affichées sur 30 images : n'affiche que les 3 premières minutes d'un enregistrement
        
            filename = str(i[0])
            offset = count * (DURATION - OVERLAP)
            try:
                y, sr = librosa.load(filename, offset = offset, duration = DURATION)
            except Exception:
                print(filename)
                continue
            window_size = 1024
            window = np.hanning(window_size)
            stft  = librosa.core.spectrum.stft(y, n_fft=window_size, hop_length=512, window=window)
    
            plt.close()
            plt.figure()
    
            vmin = np.flipud(np.log10(np.abs(stft))).mean()
            vmax = np.flipud(np.log10(np.abs(stft))).max()
    
            plt.imshow(np.flipud(np.log10(np.abs(stft))),aspect = "auto",interpolation = None,cmap = 'jet',vmin = vmin,vmax = vmax)
    
            plt.subplots_adjust(top=1, bottom=0, left=0, right=1)
    
            name = str(i[0].replace('/','_').split('.')[0]+'_'+str(count))
            try :
                plt.savefig(os.path.join(direction,folder, str(name+'.jpg')))
    
            except FileNotFoundError:
    
                os.mkdir(os.path.join(direction,folder))
                plt.savefig(os.path.join(direction,folder, str(name+'.jpg')))
            
    
    p_map(process, enumerate(df.groupby('Path')), num_cpus=10) #la colonne dir correspond au path
    print('saved to ',str(direction+folder))