7.6 KiB
Build and install.
To build the package the devtools
package is used. This also provides roxygen2
which is used for documentation and authomatic creaton of the NAMESPACE
file.
setwd("./CVE_R") # Set path to the package root.
library(devtools) # Load required `devtools` package.
document() # Create `.Rd` files and write `NAMESPACE`.
Next the package needs to be build, therefore (if pure R
package, aka. C/C++
, Fortran
, ... code) just do the following.
R CMD build CVE_R
R CMD INSTALL CVE_0.1.tar.gz
Then we are ready for using the package.
library(CVE)
help(package = "CVE")
Build and install from within R
.
An alternative approach is the following.
setwd('./CVE_R')
getwd()
library(devtools)
document()
# No vignettes to build but "inst/doc/" is required!
(path <- build(vignettes = FALSE))
install.packages(path, repos = NULL, type = "source")
Note: I only recommend this approach during development.
Reading log files.
The runtime tests (upcomming further tests) are creating log files saved in tmp/
. These log files are CSV
files (actualy TSV
) with a header storing the test results. Depending on the test the files may contain differnt data. As an example we use the runtime test logs which store in each line the dataset
, the used method
as well as the error
(actual error of estimated B
against real B
) and the time
. For reading and analysing the data see the following example.
# Load log as `data.frame`
log <- read.csv('tmp/test0.log', sep = '\t')
# Create a error boxplot grouped by dataset.
boxplot(error ~ dataset, log)
# Overview
for (ds.name in paste0('M', seq(5))) {
ds <- subset(log, dataset == ds.name, select = c('method', 'dataset', 'time', 'error'))
print(summary(ds))
}
Environments and variable lookup.
In the following a view simple examples of how R
searches for variables.
In addition we manipulate funciton closures to alter the search path in variable lookup and outer scope variable manipulation.
droids <- "These aren't the droids you're looking for."
search <- function() {
print(droids)
}
trooper.seeks <- function() {
droids <- c("R2-D2", "C-3PO")
search()
}
jedi.seeks <- function() {
droids <- c("R2-D2", "C-3PO")
environment(search) <- environment()
search()
}
trooper.seeks()
jedi.seeks()
The next example ilustrates how to write (without local copies) to variables outside the functions local environment.
counting <- function() {
count <<- count + 1 # Note the `<<-` assignment.
}
(function() {
environment(counting) <- environment()
count <- 0
for (i in 1:10) {
counting()
}
return(count)
})()
(function () {
closure <- new.env()
environment(counting) <- closure
assign("count", 0, envir = closure)
for (i in 1:10) {
counting()
}
return(closure$count)
})()
Another example for the usage of do.call
where the evaluation of parameters is illustated (example taken (and altered) from ?do.call
).
## examples of where objects will be found.
A <- "A.Global"
f <- function(x) print(paste("f.new", x))
env <- new.env()
assign("A", "A.new", envir = env)
assign("f", f, envir = env)
f <- function(x) print(paste("f.Global", x))
f(A) # f.Global A.Global
do.call("f", list(A)) # f.Global A.Global
do.call("f", list(A), envir = env) # f.new A.Global
do.call(f, list(A), envir = env) # f.Global A.Global
do.call("f", list(quote(A)), envir = env) # f.new A.new
do.call(f, list(quote(A)), envir = env) # f.Global A.new
do.call("f", list(as.name("A")), envir = env) # f.new A.new
do.call("f", list(as.name("A")), envir = env) # f.new A.new
Performance benchmarks
In this section alternative implementations of simple algorithms are compared for there performance.
Computing the trace of a matrix multiplication.
library(microbenchmark)
A <- matrix(runif(120), 12, 10)
# Matrix trace.
tr <- function(M) sum(diag(M))
# Check correctnes and benckmark performance.
stopifnot(
all.equal(
tr(t(A) %*% A),
sum(diag(t(A) %*% A)),
sum(A * A)
)
)
microbenchmark(
tr(t(A) %*% A),
sum(diag(t(A) %*% A)),
sum(A * A)
)
# Unit: nanoseconds
# expr min lq mean median uq max neval
# tr(t(A) %*% A) 4335 4713 5076.36 4949.5 5402.5 7928 100
# sum(diag(t(A) %*% A)) 4106 4429 5233.89 4733.5 5057.5 49308 100
# sum(A * A) 540 681 777.07 740.0 818.5 3572 100
n <- 200
M <- matrix(runif(n^2), n, n)
dnorm2 <- function(x) exp(-0.5 * x^2) / sqrt(2 * pi)
stopifnot(
all.equal(dnorm(M), dnorm2(M))
)
microbenchmark(
dnorm = dnorm(M),
dnorm2 = dnorm2(M),
exp = exp(-0.5 * M^2) # without scaling -> irrelevant for usage
)
# Unit: microseconds
# expr min lq mean median uq max neval
# dnorm 841.503 843.811 920.7828 855.7505 912.4720 2405.587 100
# dnorm2 543.510 580.319 629.5321 597.8540 607.3795 2603.763 100
# exp 502.083 535.943 577.2884 548.3745 561.3280 2113.220 100
Using crosspord()
p <- 12
q <- 10
V <- matrix(runif(p * q), p, q)
stopifnot(
all.equal(V %*% t(V), tcrossprod(V)),
all.equal(V %*% t(V), tcrossprod(V, V))
)
microbenchmark(
V %*% t(V),
tcrossprod(V),
tcrossprod(V, V)
)
# Unit: microseconds
# expr min lq mean median uq max neval
# V %*% t(V) 2.293 2.6335 2.94673 2.7375 2.9060 19.592 100
# tcrossprod(V) 1.148 1.2475 1.86173 1.3440 1.4650 30.688 100
# tcrossprod(V, V) 1.003 1.1575 1.28451 1.2400 1.3685 2.742 100
Recycling vs. Sweep
(n <- 200)
(p <- 12)
(q <- 10)
X_diff <- matrix(runif(n * (n - 1) / 2 * p), n * (n - 1) / 2, p)
V <- matrix(rnorm(p * q), p, q)
vecS <- runif(n * (n - 1) / 2)
stopifnot(
all.equal((X_diff %*% V) * rep(vecS, q),
sweep(X_diff %*% V, 1, vecS, `*`)),
all.equal((X_diff %*% V) * rep(vecS, q),
(X_diff %*% V) * vecS)
)
microbenchmark(
rep = (X_diff %*% V) * rep(vecS, q),
sweep = sweep(X_diff %*% V, 1, vecS, `*`, check.margin = FALSE),
recycle = (X_diff %*% V) * vecS
)
# Unit: microseconds
# expr min lq mean median uq max neval
# rep 851.723 988.3655 1575.639 1203.6385 1440.578 18999.23 100
# sweep 1313.177 1522.4010 2355.269 1879.2605 2065.399 18783.24 100
# recycle 719.001 786.1265 1157.285 881.8825 1163.202 19091.79 100
Scaled crossprod
with matmul order.
(n <- 200)
(p <- 12)
(q <- 10)
X_diff <- matrix(runif(n * (n - 1) / 2 * p), n * (n - 1) / 2, p)
V <- matrix(rnorm(p * q), p, q)
vecS <- runif(n * (n - 1) / 2)
ref <- crossprod(X_diff, X_diff * vecS) %*% V
stopifnot(
all.equal(ref, crossprod(X_diff, (X_diff %*% V) * vecS)),
all.equal(ref, crossprod(X_diff, (X_diff %*% V) * vecS))
)
microbenchmark(
inner = crossprod(X_diff, X_diff * vecS) %*% V,
outer = crossprod(X_diff, (X_diff %*% V) * vecS)
)
# Unit: microseconds
# expr min lq mean median uq max neval
# inner 789.065 867.939 1683.812 987.9375 1290.055 16800.265 100
# outer 1141.479 1216.929 1404.702 1317.7315 1582.800 2531.766 100
Using Rprof()
for performance.
The standart method for profiling where an algorithm is spending its time is with Rprof()
.
path <- '../tmp/R.prof' # path to profiling file
Rprof(path)
cve.res <- cve.call(X, Y, k = k)
Rprof(NULL)
(prof <- summaryRprof(path)) # Summarise results
Note: considure to run gc()
before measuring, aka cleaning up by explicitely calling the garbage collector.