Skip to content

Batch scripts

Below is a list of the different types of parallelism and the naming conventions we will use throughout this documentation:

Type of parallelism Naming convention Notes
Single-Process Single-Thread Serial Classic serial job
Single-Process Multi-Thread Multithreaded OpenMP or pthreads
Multi-Process Single-Thread MPI Pure MPI (one thread per rank)
Multi-Process Multi-Thread Hybrid MPI+OpenMP Standard hybrid model

We will now illustrate how each of these types is expressed in a SLURM batch script.

Example

Serial job with 1200GB of memory per core, running for 4 days and 12 hours, on the large partition:

#!/bin/bash

#SBATCH --job-name=serial_job
#SBATCH --output=%j_%x.out
#SBATCH --partition=large
#SBATCH --ntasks=1
#SBATCH --mem-per-cpu=1200G
#SBATCH --time=4-12:00:00
#SBATCH --account=my_project_name

echo "----------------- Environment ------------------"
module purge
module load foss/2022a
module list

echo "------------------- Job info -------------------"
echo "job_id             : $SLURM_JOB_ID"
echo "jobname            : $SLURM_JOB_NAME"
echo "queue              : $SLURM_JOB_PARTITION"
echo "qos                : $SLURM_JOB_QOS"
echo "account            : $SLURM_JOB_ACCOUNT"
echo "submit dir         : $SLURM_SUBMIT_DIR"
echo "number of mpi tasks: $SLURM_NTASKS tasks"
echo "OMP_NUM_THREADS    : $OMP_NUM_THREADS"

echo "------------------- Node list ------------------"
echo $SLURM_JOB_NODELIST

echo "---------------- Checking limits ---------------"
ulimit -a

echo "--------------- Running the code ---------------"

echo -n "This run started on: "
date

./runner.serial

echo -n "This run completed on: "
date

SMP/OpenMP job with 64 threads and a total of 60GB memory, running for 12 hours on the batch partition:

#!/bin/bash

# ------------------------------------------------------------------------------
# Slurm directives
# ------------------------------------------------------------------------------

#SBATCH --job-name=openmp_job
#SBATCH --output=%j_%x.out
#SBATCH --partition=batch
#SBATCH --ntasks=1
#SBATCH --mem=60G
#SBATCH --cpus-per-task=64
#SBATCH --time=12:00:00
#SBATCH --account=my_project_name

# ------------------------------------------------------------------------------
# Setting up the environment
# ------------------------------------------------------------------------------

echo "----------------- Environment ------------------"
module purge
module load foss/2022a
module list

export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK

# ------------------------------------------------------------------------------
# Printing some information
# ------------------------------------------------------------------------------

echo "------------------- Job info -------------------"
echo "job_id             : $SLURM_JOB_ID"
echo "jobname            : $SLURM_JOB_NAME"
echo "queue              : $SLURM_JOB_PARTITION"
echo "qos                : $SLURM_JOB_QOS"
echo "account            : $SLURM_JOB_ACCOUNT"
echo "submit dir         : $SLURM_SUBMIT_DIR"
echo "number of mpi tasks: $SLURM_NTASKS tasks"
echo "OMP_NUM_THREADS    : $OMP_NUM_THREADS"
echo "Executable         : $EXEC"

echo "------------------- Node list ------------------"
echo $SLURM_JOB_NODELIST

echo "---------------- Checking limits ---------------"
ulimit -a

# ------------------------------------------------------------------------------
# And finally running the code
# ------------------------------------------------------------------------------

echo "--------------- Running the code ---------------"

echo -n "This run started on: "
date

./runner.omp

echo -n "This run completed on: "
date

Pure MPI (one thread per rank).

#!/bin/bash

# ------------------------------------------------------------------------------
# Slurm directives
# ------------------------------------------------------------------------------

#SBATCH --job-name=mpi_job
#SBATCH --output=%j_%x.out
#SBATCH --partition=batch
#SBATCH --ntasks=1024
#SBATCH --mem-per-cpu=1920M
#SBATCH --time=24:00:00
#SBATCH --account=my_project_name

# ------------------------------------------------------------------------------
# Setting up the environment
# ------------------------------------------------------------------------------

echo "----------------- Environment ------------------"
module purge
module load PrgEnv-cray
module list

# ------------------------------------------------------------------------------
# Printing some information
# ------------------------------------------------------------------------------

echo "------------------- Job info -------------------"
echo "job_id             : $SLURM_JOB_ID"
echo "jobname            : $SLURM_JOB_NAME"
echo "queue              : $SLURM_JOB_PARTITION"
echo "qos                : $SLURM_JOB_QOS"
echo "account            : $SLURM_JOB_ACCOUNT"
echo "submit dir         : $SLURM_SUBMIT_DIR"
echo "number of mpi tasks: $SLURM_NTASKS tasks"
echo "OMP_NUM_THREADS    : $OMP_NUM_THREADS"
echo "Executable         : $EXEC"

echo "------------------- Node list ------------------"
echo $SLURM_JOB_NODELIST

echo "---------------- Checking limits ---------------"
ulimit -a

# ------------------------------------------------------------------------------
# And finally running the code
# ------------------------------------------------------------------------------

echo "--------------- Running the code ---------------"

echo -n "This run started on: "
date

srun ./runner.mpi

echo -n "This run completed on: "
date

Standard hybrid model (multiple threads per MPI process).

#!/bin/bash

# ------------------------------------------------------------------------------
# Slurm directives
# ------------------------------------------------------------------------------

#SBATCH --job-name=hybrid_job
#SBATCH --output=%j_%x.out
#SBATCH --partition=batch
#SBATCH --ntasks=256
#SBATCH --cpus-per-task=8
#SBATCH --mem-per-cpu=1920M
#SBATCH --time=12:00:00
#SBATCH --account=my_project_name

# ------------------------------------------------------------------------------
# Setting up the environment
# ------------------------------------------------------------------------------

echo "----------------- Environment ------------------"
module purge
module load PrgEnv-cray
module list

export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK

# ------------------------------------------------------------------------------
# Printing some information
# ------------------------------------------------------------------------------

echo "------------------- Job info -------------------"
echo "job_id             : $SLURM_JOB_ID"
echo "jobname            : $SLURM_JOB_NAME"
echo "queue              : $SLURM_JOB_PARTITION"
echo "qos                : $SLURM_JOB_QOS"
echo "account            : $SLURM_JOB_ACCOUNT"
echo "submit dir         : $SLURM_SUBMIT_DIR"
echo "number of mpi tasks: $SLURM_NTASKS tasks"
echo "OMP_NUM_THREADS    : $OMP_NUM_THREADS"
echo "Executable         : $EXEC"

echo "------------------- Node list ------------------"
echo $SLURM_JOB_NODELIST

echo "---------------- Checking limits ---------------"
ulimit -a

# ------------------------------------------------------------------------------
# And finally running the code
# ------------------------------------------------------------------------------

echo "--------------- Running the code ---------------"

echo -n "This run started on: "
date

srun ./runner.hybrid

echo -n "This run completed on: "
date

Other batch scripts examples:

Example
srun -p batch -A my_project_name -N 1 -n 16 --mem-per-cpu=1024M -t 60 --pty bash
salloc -p batch -A my_project_name -N 2 -n 256 --mem=241G -t 2:00:00
# and once the resources are allocated use srun the same way as in submission scripts:
srun ./runner.mpi

Using GPUs

#!/bin/bash

# ------------------------------------------------------------------------------
# Slurm directives
# ------------------------------------------------------------------------------

#SBATCH --job-name=gpu_job
#SBATCH --output=%j_%x.out
#SBATCH --partition=gpu
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=4
#SBATCH --mem=240G
#SBATCH --gpus=4
#SBATCH --time=10:00:00
#SBATCH --account=my_project_name

# ------------------------------------------------------------------------------
# Setting up the environment
# ------------------------------------------------------------------------------

echo "----------------- Environment ------------------"
module purge
module load CUDA/11.7.0
module list

# ------------------------------------------------------------------------------
# Printing some information
# ------------------------------------------------------------------------------

echo "------------------- Job info -------------------"
echo "job_id             : $SLURM_JOB_ID"
echo "jobname            : $SLURM_JOB_NAME"
echo "queue              : $SLURM_JOB_PARTITION"
echo "qos                : $SLURM_JOB_QOS"
echo "account            : $SLURM_JOB_ACCOUNT"
echo "submit dir         : $SLURM_SUBMIT_DIR"
echo "number of mpi tasks: $SLURM_NTASKS tasks"
echo "OMP_NUM_THREADS    : $OMP_NUM_THREADS"
echo "number of gpus     : $SLURM_GPUS_ON_NODE"
echo "Executable         : $EXEC"

echo "------------------- Node list ------------------"
echo $SLURM_JOB_NODELIST

echo "---------------- Checking limits ---------------"
ulimit -a

# ------------------------------------------------------------------------------
# And finally running the code
# ------------------------------------------------------------------------------

echo "--------------- Running the code ---------------"

echo -n "This run started on: "
date

srun ./runner.cuda

echo -n "This run completed on: "
date

Running many similar jobs with small variations (e.g. different input files or conditions)

#!/bin/bash

# ------------------------------------------------------------------------------
# Slurm directives
# ------------------------------------------------------------------------------

#SBATCH --job-name=array_job
#SBATCH --output=%A-%a_%x.out
#SBATCH --partition=batch
#SBATCH --ntasks=1
#SBATCH --mem-per-cpu=1G
#SBATCH --time=1:00:00
#SBATCH --array=0-19
#SBATCH --account=my_project_name

# ------------------------------------------------------------------------------
# Setting up the environment
# ------------------------------------------------------------------------------

echo "----------------- Environment ------------------"
module purge
module load foss/2022a
module list

# ------------------------------------------------------------------------------
# Printing some information
# ------------------------------------------------------------------------------

echo "------------------- Job info -------------------"
echo "job_id             : $SLURM_JOB_ID"
echo "jobname            : $SLURM_JOB_NAME"
echo "queue              : $SLURM_JOB_PARTITION"
echo "qos                : $SLURM_JOB_QOS"
echo "account            : $SLURM_JOB_ACCOUNT"
echo "submit dir         : $SLURM_SUBMIT_DIR"
echo "number of mpi tasks: $SLURM_NTASKS tasks"
echo "OMP_NUM_THREADS    : $OMP_NUM_THREADS"
echo "Executable         : $EXEC"

echo "------------------- Node list ------------------"
echo $SLURM_JOB_NODELIST

echo "---------------- Checking limits ---------------"
ulimit -a

# ------------------------------------------------------------------------------
# And finally running the code
# ------------------------------------------------------------------------------

echo "--------------- Running the code ---------------"

echo -n "This run started on: "
date

srun ./runner $SLURM_ARRAY_TASK_ID

echo -n "This run completed on: "
date

Running (many) independent process inside a job

#!/bin/bash

#SBATCH --job-name=packed_job

Requesting heterogeneous resources for the same job (e.g. 1 cpu with 100GB of mem + 64 cpu with 2GB)

#!/bin/bash

#SBATCH --job-name=heterogen_job

Running various programs in the same job

#!/bin/bash

#SBATCH --job-name=cosim_job