yafe_slurm.py 5.68 KB
Newer Older
valentin.emiya's avatar
valentin.emiya committed
1
# -*- coding: utf-8 -*-
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# ######### COPYRIGHT #########
# Credits
# #######
#
# Copyright(c) 2020-2020
# ----------------------
#
# * Laboratoire d'Informatique et Systèmes <http://www.lis-lab.fr/>
# * Université d'Aix-Marseille <http://www.univ-amu.fr/>
# * Centre National de la Recherche Scientifique <http://www.cnrs.fr/>
# * Université de Toulon <http://www.univ-tln.fr/>
#
# Contributors
# ------------
#
# * `Valentin Emiya <mailto:valentin.emiya@lis-lab.fr>`_
# * `Ama Marina Krémé <mailto:ama-marina.kreme@lis-lab.fr>`_
#
# This package has been created thanks to the joint work with Florent Jaillet
# and Ronan Hamon on other packages.
#
# Description
# -----------
#
# Time frequency fading using Gabor multipliers
#
# Version
# -------
#
# * tffpy version = 0.1.4
#
# Licence
# -------
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# ######### COPYRIGHT #########
valentin.emiya's avatar
valentin.emiya committed
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
"""

.. moduleauthor:: Valentin Emiya
"""

import importlib
import os
from pathlib import Path
import stat
import sys


def generate_slurm_script(script_file_path, xp_var_name, task_ids=None,
                          n_simultaneous_jobs=10, slurm_walltime='02:00:00',
                          activate_env_command=None, use_gpu=False):
    """Generate a script to launch an experiment using Slurm.

    Tasks are divided into batches that are executed by oar jobs.

    The resulting script is written in the experiment folder, and the command
    to launch the jobs with Slurm is displayed in the terminal.

valentin.emiya's avatar
doc    
valentin.emiya committed
72
73
    An example of a similar usage in the case of OAR (script
    :func:`yafe.utils.generate_oar_script`) is illustrated by in
valentin.emiya's avatar
valentin.emiya committed
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
    :ref:`tutorial <tutorial_oar>`.

    Parameters
    ----------
    script_file_path : str
        File path to the script that defines the experiment.
    xp_var_name : str
        Name of the variable containing the experiment in the script.
    task_ids : list
        List of tasks ids to run.
        If ``task_ids`` is ``None``, the list of pending tasks of the
        experiment is used.
    batch_size : int
        Number of tasks run in each batch.
    slurm_walltime : str
        Wall time for each Slurm job ('HH:MM:SS').
    activate_env_command : str or None
        Optional command that must be run to activate a Python virtual
        environment before launching the experiment.
        Typically, this is a command of the form
        ``source some_virtual_env/bin/activate`` when using virtualenv and
        ``source activate some_conda_env`` when using conda.
        If ``activate_env_command`` is ``None``, no virtual environment is
        activated.
    use_gpu : bool
        Flag specifying if a gpu ressource is needed when running the
valentin.emiya's avatar
doc    
valentin.emiya committed
100
        experiment. This has not been implemented yet.
valentin.emiya's avatar
valentin.emiya committed
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
    """
    script_file_path = Path(script_file_path)
    script_dir = script_file_path.parent
    script_name = script_file_path.stem

    sys.path.append(str(script_dir))
    mod = importlib.import_module(script_name)
    xp = getattr(mod, xp_var_name)
    script_dir = xp.xp_path / 'job_scripts'
    script_dir.mkdir(exist_ok=True)
    for f in script_dir.glob('*.sh'):
        os.remove(f)

    if task_ids is None:
        task_ids = xp.get_pending_task_ids()

    # generate and save script

    # Generate job script
valentin.emiya's avatar
valentin.emiya committed
120
121
    log_dir = xp.xp_path / 'logs'
    log_dir.mkdir(exist_ok=True)
valentin.emiya's avatar
valentin.emiya committed
122
123
124
125
126
    script = '#!/bin/sh\n'
    # define parameters
    script += '#SBATCH --job-name={}\n'.format(xp.name)
    script += '#SBATCH --array={}%{}\n'.format(
        ','.join(str(i) for i in task_ids), n_simultaneous_jobs)
valentin.emiya's avatar
valentin.emiya committed
127
128
    script += '#SBATCH --output={}/stdout_%A_%a.slurm\n'.format(log_dir)
    script += '#SBATCH --error={}/stderr_%A_%a.slurm\n'.format(log_dir)
valentin.emiya's avatar
valentin.emiya committed
129
130
131
132
133
134
    script += '#SBATCH --time={}\n'.format(slurm_walltime)
    # if use_gpu:
    #     script += '#SBATCH -p gpu IS NOT NULL\n'
    # else:
    #     script += '#SBATCH -p gpu IS NULL\n'

valentin.emiya's avatar
valentin.emiya committed
135
    script += 'srun -N1 -n1 {}/run_$SLURM_ARRAY_TASK_ID.sh'.format(script_dir)
valentin.emiya's avatar
valentin.emiya committed
136
137
138
139
140
141
142
143

    script_path = script_dir / 'script_slurm.sh'
    with script_path.open('w') as file:
        file.write(script)
    status = os.stat(script_path)
    os.chmod(script_path, status.st_mode | stat.S_IXUSR)

    # Generate a script of each array element
valentin.emiya's avatar
valentin.emiya committed
144
    for idt in task_ids:
valentin.emiya's avatar
valentin.emiya committed
145
146
147
148
149
150
151
        script = '#!/bin/sh\n'

        # activate the virtual env
        if activate_env_command is not None and len(activate_env_command) > 0:
            script += '{}\n'.format(activate_env_command)

        # python command
valentin.emiya's avatar
valentin.emiya committed
152
        script += 'echo "Running {}.launch_experiment(task_ids=[{}])"\n'\
valentin.emiya's avatar
valentin.emiya committed
153
154
155
            .format(xp_var_name, idt)
        script += 'python -c "import sys; sys.path.append(\'{0}\'); ' \
            'from {1} import {2}; ' \
valentin.emiya's avatar
valentin.emiya committed
156
            '{2}.launch_experiment(task_ids=[{3}])"\n'.format(
valentin.emiya's avatar
valentin.emiya committed
157
158
159
                script_dir, script_name, xp_var_name, idt)
        script += 'exit $?'

valentin.emiya's avatar
valentin.emiya committed
160
        script_i_path = script_dir / 'run_{}.sh'.format(idt)
valentin.emiya's avatar
valentin.emiya committed
161
162
163
164
165
166
167
168
169
        with script_i_path.open('w') as file:
            file.write(script)
        status = os.stat(script_i_path)
        os.chmod(script_i_path, status.st_mode | stat.S_IXUSR)

    print('*' * 80)
    print('Submit the job array using:')
    print('sbatch {}'.format(str(script_path)))
    print('*' * 80)