Hello World: R + Fortran + OpenMP
Page content
Why?
I want to fill up a big matrix and I care about speed and to a lesser degree memory efficiency. In practice the matrix will have 4000 rows and K columns where K is the number of observations for which I want to run my predictive model. For this exercise I will keep K to just 500 because my R approach eats a ton of memory. For this simple exercise, I will \(A_{ik} = 1 / (1 + exp(i^2 + i^3 + k^2 + k^3))\)
in practice the operation that I need to do is much more complicated which will make the difference is run time even bigger.
R
tic('R approach')
library(furrr)
## Loading required package: future
options(future.globals.maxSize= 2097152000)
r_func_ik <- function(i,k){
ik = 1 / (1 + exp(i^2 + i^3 + k^2 + k^3))
return(c(i=i, k=k , ik=ik))
}
fill_matrix_r <- function(N,K,workers){
nk <- tidyr::crossing(n=1:N, k=1:K)
eval(parse(text = glue::glue('plan(multiprocess(workers = {workers}))')))
ik_list <- future_map2(.x = nk$n, .y = nk$k,
.f = ~ r_func_ik(i = .x, k = .y),
.progress = F)
y_pred <- matrix(nrow = N, ncol = K)
for(i in 1:(N*K)){
y_pred[ik_list[[i]][[1]],ik_list[[i]][[2]]] <- ik_list[[i]][[3]]
}
return(y_pred)
}
A <- fill_matrix_r(N = N, K = K, workers = 4)
toc()
## R approach: 6.182 sec elapsed
Fortran
All the code for my R package is saved here. This is the Fortran subroutine:
module im_f_module
use omp_lib
implicit none
contains
subroutine fill_matrix(N,K,A,nthreads) bind(C, name="fill_")
use, intrinsic :: iso_c_binding, only : c_double, c_int
integer(c_int), intent(in) :: N,K, nthreads
integer :: nn, kk, thread_num
real(c_double), DIMENSION(N, K), intent(out) :: A
! Specify number of threads to use:
!$ call omp_set_num_threads(nthreads)
!$omp parallel do
do nn=1,N
do kk=1,K
CALL do_something(i=DBLE(nn),k=DBLE(kk), ik= A(nn,kk))
end do
!print *, A(nn, :)
end do
!$omp end parallel do
end subroutine fill_matrix
subroutine do_something(i,k, ik)
double precision, intent(in) :: i, k
double precision, intent(out) :: ik
ik = 1 / (1 + exp(i**2 + i**3 + k**2 + k**3))
end subroutine do_something
end module im_f_module
This is the R function that calls that subroutine:
#'@export
#'@useDynLib fortranMatrix, .registration = TRUE
fill_my_matrix <- function(N=10, K=5, nthreads=4) {
(OpenMPController::omp_set_num_threads(nthreads))
A <- .Fortran("fill",
N = as.integer(N),
K = as.integer(K),
A = matrix(1982,nrow = N, ncol = K),
nthreads = as.integer(nthreads))
return(A$A)
}
and finally, this is the Makevars
so things compile the right way
##### Compiler flags #####
PKG_FCFLAGS = $(SHLIB_OPENMP_FFLAGS)
PKG_LIBS = $(SHLIB_OPENMP_CFLAGS)
##### Phony target for R's build system to invoke #####
all: $(SHLIB)
##### Clean target #####
clean:
rm -f *.o *.mod
Calling the Fortran code
library(fortranMatrix)
tic('Fortran approach')
library(fortranMatrix)
B <- fill_my_matrix(N = N, K = K, nthreads = 4)
toc()
## Fortran approach: 0.016 sec elapsed
All equal
all.equal(A, B, tolerance = .Machine$double.eps)
## [1] TRUE
Microbenchmark
library(microbenchmark)
mb <- microbenchmark(R1 = fill_matrix_r(N = N, K = K, workers = 4),
F1 = fill_my_matrix(N = N, K = K, nthreads = 4), times = 20L)
ggplot2::autoplot(mb)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.