Cloud computing with R and AWS
Page content
Why?
Credit
I took most of the code from this gist
The code
This function takes a list with your instances, the path to your private key, and returns a cluster object that can be used with the future package. I was told that this function will be part of a new package soon.
aws_cluster <- function(instances, key){
# Ensure we are running and initialized
is_running <- vector("logical", length(instances))
is_initialized <- vector("logical", length(instances))
while(!all(is_initialized)) {
for(ii in seq_along(instances)) {
# Current instance
i_ii <- instance_status(instances[[ii]])
# Initially, we don't get any information
if(length(i_ii) == 0) {
next()
}
# First check if we are at least running
if(!is_running[ii]) {
if(unlist(i_ii$item$instanceState$name) == "running") {
is_running[ii] <- TRUE
message("Instance ", ii, " is running. Now initialzing.")
}
}
# Then check if we are initialized
if(!is_initialized[ii]) {
if(unlist(i_ii$item$instanceStatus$status) == "ok") {
is_initialized[ii] <- TRUE
message("Instance ", ii, " is initialized.")
}
}
}
}
# Get the public IPs
public_ip <- vapply(
instances,
function(i_ii) {
i_di <- describe_instances(i_ii)
i_di[[1]]$instancesSet[[1]]$networkInterfaceSet$privateIpAddressesSet$association$publicIp
},
FUN.VALUE = character(1)
)
# Connect!
cl <- makeClusterPSOCK(
## Public IP number of EC2 instance
public_ip,
## User name (always 'ubuntu')
user = "ubuntu",
## Use private SSH key registered with AWS
rshopts = c(
"-o", "StrictHostKeyChecking=no",
"-o", "IdentitiesOnly=yes",
"-i", key
),
## Set up .libPaths() for the 'ubuntu' user and
## install future/purrr/furrr packages
rscript_args = c(
"-e", shQuote("local({p <- Sys.getenv('R_LIBS_USER'); dir.create(p, recursive = TRUE, showWarnings = FALSE); .libPaths(p)})"),
"-e", shQuote("install.packages(c('future', 'purrr', 'furrr'))")
),
dryrun = FALSE
)
return(cl)
}
Load the relevant packages
# devtools::install_github("cloudyr/aws.ec2", ref = devtools::github_pull(38))
library(aws.ec2)
library(future)
library(furrr)
library(tictoc)
aws.signature::use_credentials()
Baseline test
plan(sequential)
tic("baseline")
future_map(1:2, ~Sys.sleep(60))
toc()
baseline: 121.231 sec elapsed
image <- "ami-fd2ffe87"
# Check your VPC and Security Group settings
s <- describe_subnets()
g <- describe_sgroups("sg-16fa225d")
kp <- describe_keypairs("synology") # <- Your keypair here
# Launch the instance using appropriate settings
i <- run_instances(image = image,
type = "t2.medium",
sgroup = g,
subnet = s[[1]],
min = 2L, # <- Launching 2 medium instances
keypair = kp$synology)
cl <- aws_cluster(instances = i, key = "/home/ignacio/AWS/synology.pem")
############## Now we have a cluster object we can use with future
plan(cluster, workers = cl)
tic("test")
future_map(1:2, ~Sys.sleep(60))
toc()
test: 61.698 sec elapsed
Finally, you can programmatically shutdown the cluster we just created
parallel::stopCluster(cl)
terminate_instances(i)