Hello World: R + Fortran + OpenMP

Hello World: R + Fortran + OpenMP
Page content

Why?

I want to fill up a big matrix and I care about speed and to a lesser degree memory efficiency. In practice the matrix will have 4000 rows and K columns where K is the number of observations for which I want to run my predictive model. For this exercise I will keep K to just 500 because my R approach eats a ton of memory. For this simple exercise, I will \(A_{ik} = 1 / (1 + exp(i^2 + i^3 + k^2 + k^3))\) in practice the operation that I need to do is much more complicated which will make the difference is run time even bigger.

R

tic('R approach')
library(furrr)
## Loading required package: future
options(future.globals.maxSize= 2097152000)


r_func_ik <- function(i,k){
  ik = 1 / (1 + exp(i^2 + i^3 + k^2 + k^3))
  return(c(i=i, k=k , ik=ik))
}

fill_matrix_r <- function(N,K,workers){
  nk <- tidyr::crossing(n=1:N, k=1:K)
  eval(parse(text = glue::glue('plan(multiprocess(workers = {workers}))')))
  ik_list <- future_map2(.x = nk$n, .y = nk$k,
                         .f = ~ r_func_ik(i = .x, k = .y),
                         .progress = F)
  y_pred <- matrix(nrow = N, ncol = K)
  for(i in 1:(N*K)){
    y_pred[ik_list[[i]][[1]],ik_list[[i]][[2]]] <- ik_list[[i]][[3]]
  }
  return(y_pred)
  
}

A <- fill_matrix_r(N = N, K = K, workers = 4)
toc()
## R approach: 6.182 sec elapsed

Fortran

All the code for my R package is saved here. This is the Fortran subroutine:

module im_f_module
   use omp_lib
   implicit none
   contains

subroutine fill_matrix(N,K,A,nthreads) bind(C, name="fill_")
   use, intrinsic                                         :: iso_c_binding, only : c_double, c_int
   integer(c_int), intent(in)                             :: N,K, nthreads
   integer                                                :: nn, kk, thread_num
   real(c_double), DIMENSION(N, K), intent(out)	          :: A
    ! Specify number of threads to use:
  	!$ call omp_set_num_threads(nthreads)
  	!$omp parallel do 
  	do nn=1,N
  	   do kk=1,K
  	      CALL do_something(i=DBLE(nn),k=DBLE(kk), ik= A(nn,kk))
  	   end do
  	   !print *, A(nn, :)
  	end do
  	!$omp end parallel do
end subroutine fill_matrix

 
 subroutine do_something(i,k, ik)
    double precision, intent(in)                       :: i, k 
    double precision, intent(out)                      :: ik 
      ik = 1 / (1 + exp(i**2 + i**3 + k**2 + k**3))
 end subroutine do_something



end module im_f_module

This is the R function that calls that subroutine:

#'@export
#'@useDynLib fortranMatrix, .registration = TRUE
fill_my_matrix <- function(N=10, K=5, nthreads=4) {
  (OpenMPController::omp_set_num_threads(nthreads))
  A <- .Fortran("fill",
                N = as.integer(N),
                K = as.integer(K),
                A = matrix(1982,nrow = N, ncol = K),
                nthreads = as.integer(nthreads))
  return(A$A)
}

and finally, this is the Makevars so things compile the right way

#####  Compiler flags  #####
PKG_FCFLAGS = $(SHLIB_OPENMP_FFLAGS)
PKG_LIBS = $(SHLIB_OPENMP_CFLAGS)

#####  Phony target for R's build system to invoke  #####
all: $(SHLIB)

#####  Clean target  #####
clean:
	rm -f *.o *.mod

Calling the Fortran code

library(fortranMatrix)
tic('Fortran approach')
library(fortranMatrix)
B <- fill_my_matrix(N = N, K = K, nthreads = 4)
toc()
## Fortran approach: 0.016 sec elapsed

All equal

all.equal(A, B, tolerance = .Machine$double.eps)
## [1] TRUE

Microbenchmark

library(microbenchmark)

mb <- microbenchmark(R1 = fill_matrix_r(N = N, K = K, workers = 4),
                     F1 = fill_my_matrix(N = N, K = K, nthreads = 4), times = 20L)


ggplot2::autoplot(mb)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.

Relevant links