Difference between revisions of "R MPI Example"

From UFRC
Jump to navigation Jump to search
Line 69: Line 69:
 
# where X is one less than the total number of MPI ranks
 
# where X is one less than the total number of MPI ranks
  
srun --mpi=pmi2 Rscript /ufrc/data/training/SLURM/prime/rmpi_test.R
+
srun --mpi=pmi2 Rscript /data/training/SLURM/prime/rmpi_test.R
  
 
date
 
date

Revision as of 19:28, 29 September 2020

back to the main R page

Example, of using the parallel module to run MPI jobs under SLURM with Rmpi library.

# Load the R MPI package if it is not already loaded.
if (!is.loaded("mpi_initialize")) {
    library("Rmpi")
    }

ns <- mpi.universe.size() - 1
mpi.spawn.Rslaves(nslaves=ns)
#
# In case R exits unexpectedly, have it automatically clean up
# resources taken up by Rmpi (slaves, memory, etc...)
.Last <- function(){
       if (is.loaded("mpi_initialize")){
           if (mpi.comm.size(1) > 0){
               print("Please use mpi.close.Rslaves() to close slaves.")
               mpi.close.Rslaves()
           }
           print("Please use mpi.quit() to quit R")
           .Call("mpi_finalize")
       }
}
# Tell all slaves to return a message identifying themselves
mpi.bcast.cmd( id <- mpi.comm.rank() )
mpi.bcast.cmd( ns <- mpi.comm.size() )
mpi.bcast.cmd( host <- mpi.get.processor.name() )
mpi.remote.exec(paste("I am",mpi.comm.rank(),"of",mpi.comm.size()))

# Test computations
x <- 5
x <- mpi.remote.exec(rnorm, x)
length(x)
x

# Tell all slaves to close down, and exit the program
mpi.close.Rslaves(dellog = FALSE)
mpi.quit()


Example job script using rmpi_test.R script.

#!/bin/sh
#SBATCH --job-name=mpi_job_test # Job name
#SBATCH --mail-type=END,FAIL # Mail events (NONE, BEGIN, END, FAIL, ALL)
#SBATCH --mail-user=ENTER_YOUR_EMAIL_HERE # Where to send mail	
#SBATCH --cpus-per-task=1 # Number of cores per MPI rank 
#SBATCH --nodes=2 #Number of nodes
#SBATCH --ntasks=8 # Number of MPI ranks
#SBATCH --ntasks-per-node=4 #How many tasks on each node
#SBATCH --ntasks-per-socket=2 #How many tasks on each CPU or socket
#SBATCH --distribution=cyclic:cyclic #Distribute tasks cyclically on nodes and sockets
#SBATCH --mem-per-cpu=1gb # Memory per processor
#SBATCH --time=00:05:00 # Time limit hrs:min:sec
#SBATCH --output=mpi_test_%j.out # Standard output and error log
pwd; hostname; date

echo "Running example Rmpi script. Using $SLURM_JOB_NUM_NODES nodes with $SLURM_NTASKS 
tasks, each with $SLURM_CPUS_PER_TASK cores."
module purge; module load intel/2018 openmpi/3.1.2 Rmpi/3.5.1

# Use '-np 1' since Rmpi does its own task management
# Make sure the mpi.spawn.Rslaves(nslaves=X) code spawns X slaves 
# where X is one less than the total number of MPI ranks

srun --mpi=pmi2 Rscript /data/training/SLURM/prime/rmpi_test.R

date

For rmpi/4.0 module the following command will work

mpiexec -n ${SLURM_NTASKS} Rscript rmpi_test.R


Example .Rprofile configuration file that must be placed in the working directory if the rmpi module doesn't add a symlink automatically.


# Example .Rprofile config, put in the working directory. 
# Unix files that start with a "." are hidden. "ls -a" will show hidden files.

# This R profile can be used when a cluster does not allow spawning or a
# job scheduler is required to launch any parallel jobs. Saving this
# file as .Rprofile in the working directory or root directory. For unix
# platform, run mpirexec -n [cpu numbers] R --no-save -q For windows
# platform with mpich2, use mpiexec wrapper and specify a working
# directory where .Rprofile is inside. Cannot be used as Rprofile.site
# because it will not work Following system libraries are not loaded
# automatically. So manual loads are needed.

library(utils)
library(stats)
library(datasets)
library(grDevices)
library(graphics)
library(methods)

if (!invisible(library(Rmpi,logical.return = TRUE))){
    warning("Rmpi cannot be loaded")
    q(save = "no")
}

options(error=quote(assign(".mpi.err", FALSE, env = .GlobalEnv)))

if (mpi.comm.size(0) > 1)
    invisible(mpi.comm.dup(0,1))

if (mpi.comm.rank(0) > 0){
    options(echo=FALSE)
    .comm <- 1
    mpi.barrier(0)
    repeat
    try(eval(mpi.bcast.cmd(rank=0,comm=.comm)),TRUE)
    if (is.loaded("mpi_comm_disconnect"))
        mpi.comm.disconnect(.comm)
    else mpi.comm.free(.comm)
        mpi.quit()
}

if (mpi.comm.rank(0)==0) {
    options(echo=TRUE)
    mpi.barrier(0)
    if(mpi.comm.size(0) > 1)
        slave.hostinfo(1)
}

.Last <- function(){
    if (is.loaded("mpi_initialize")){
        if (mpi.comm.size(1) > 1){
            print("Please use mpi.close.Rslaves() to close slaves")
            mpi.close.Rslaves(comm=1)
        }
    }
    print("Please use mpi.quit() to quit R")
    mpi.quit()
}