tensor_predictors/sim/sim_utils.R

206 lines
7.4 KiB
R

#' Some utility function used in simulations
#' Computes the orthogonal projection matrix onto the span of `A`
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
#' Logging Errors and Warnings
#'
#' Register a global warning and error handler for logging warnings/errors with
#' current simulation repetition session informatin allowing to reproduce problems
#'
#' @examples
#' # Usage
#' globalCallingHandlers(list(
#' message = exceptionLogger("warning.log"),
#' warning = exceptionLogger("warning.log"),
#' error = exceptionLogger("error.log")
#' ))
#' # Do some stuff where an error might occure
#' for (rep in 1:1000) {
#' # Store additional information logged with an error when an exception occures
#' storeExceptionInfo(rep = rep)
#' # Do work
#' stopifnot(rep < 17)
#' }
#'
assign(".exception.info", NULL, env = .GlobalEnv)
exceptionLogger <- function(file.name) {
force(file.name)
function(ex) {
log <- file(file.name, open = "a+")
cat("\n### Log At: ", format(Sys.time()), "\n", file = log)
cat("# Exception:\n", file = log)
cat(as.character.error(ex), file = log)
cat("\n# Exception Info:\n", file = log)
dump(".exception.info", envir = .GlobalEnv, file = log)
cat("\n# Traceback:\n", file = log)
# add Traceback (see: `traceback()` which the following is addapted from)
n <- length(x <- .traceback(NULL, max.lines = -1L))
if (n == 0L) {
cat("No traceback available", "\n", file = log)
} else {
for (i in 1L:n) {
xi <- x[[i]]
label <- paste0(n - i + 1L, ": ")
m <- length(xi)
srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) {
srcfile <- attr(srcref, "srcfile")
paste0(" at ", basename(srcfile$filename), "#", srcref[1L])
}
if (isTRUE(attr(xi, "truncated"))) {
xi <- c(xi, " ...")
m <- length(xi)
}
if (!is.null(srcloc)) {
xi[m] <- paste0(xi[m], srcloc)
}
if (m > 1) {
label <- c(label, rep(substr(" ", 1L,
nchar(label, type = "w")), m - 1L))
}
cat(paste0(label, xi), sep = "\n", file = log)
}
}
close(log)
}
}
#' Used in conjuntion with `exceptionLogger()`
storeExceptionInfo <- function(...) {
info <- list(...)
info$RNGking <- RNGkind()
info$.Random.seed <- get0(".Random.seed", envir = .GlobalEnv)
.GlobalEnv$.exception.info <- info
}
### Simulation logging routine
#' @examples
#' # Create a CSV logger
#' logger <- CSV.logger("test.csv", header = c("A", "B", "C"))
#'
#' # Store some values in current environment
#' A <- 1
#' B <- 2
#' # write values to csv file (order irrelevent)
#' logger(C = -3, B = -2)
#'
#' # another line
#' A <- 10
#' B <- 20
#' logger(B = -20, C = -30)
#'
#' # read the file back in
#' read.csv("test.csv")
#'
#' # In simulations it is often usefull to time stamp the files
#' nr <- 5
#' logger <- CSV.logger(
#' sprintf("test-nr%03d-%s.csv", nr, format(Sys.time(), "%Y%m%dT%H%M")),
#' header = c("A", "B", "C")
#' )
#'
CSV.logger <- function(file.name, header) {
force(file.name)
# CSV header, used to ensure correct value/column mapping when writing to file
force(header)
cat(paste0(header, collapse = ","), "\n", sep = "", file = file.name)
function(...) {
# get directly provided data
arg.data <- list(...)
# all arguments must be given with a name
if (length(arg.data) && is.null(names(arg.data))) {
stop("Arguments must be given with names")
}
# check if all elements have a described CSV header column
unknown <- !(names(arg.data) %in% header)
if (any(unknown)) {
stop("Got unknown columns: ", paste0(names(arg.data)[unknown], collapse = ", "))
}
# get missing values from environment
missing <- !(header %in% names(arg.data))
env <- parent.frame()
data <- c(arg.data, mget(header[missing], envir = env))
# Format all aguments
data <- Map(format, data)
# collaps into single line
line <- paste0(data[header], collapse = ",")
# write data line to file
cat(line, "\n", sep = "", file = file.name, append = TRUE)
}
}
# Set colors for every method
methods <- c("gmlm", "pca", "hopca", "tsir", "mgcca", "lpca", "clpca", "tnormal", "sir")
col.methods <- palette.colors(n = length(methods), palette = "Okabe-Ito", recycle = FALSE)
names(col.methods) <- methods
# Comparison plot of one measure for a simulation
plot.sim <- function(sim, measure.name, ..., ylim = c(0, 1)) {
par.default <- par(pch = 16, bty = "n", lty = "solid", lwd = 1.5)
# # Set colors for every method
# methods <- c("gmlm", "pca", "hopca", "tsir", "mgcca", "lpca", "clpca", "tnormal")
# col.methods <- palette.colors(n = length(methods), palette = "Okabe-Ito", recycle = FALSE)
# names(col.methods) <- methods
# Remain sample size grouping variable to avoid conflicts
aggr.mean <- aggregate(sim, list(sampleSize = sim$sample.size), mean)
aggr.median <- aggregate(sim, list(sampleSize = sim$sample.size), median)
aggr.sd <- aggregate(sim, list(sampleSize = sim$sample.size), sd)
aggr.min <- aggregate(sim, list(sampleSize = sim$sample.size), min)
aggr.max <- aggregate(sim, list(sampleSize = sim$sample.size), max)
with(aggr.mean, {
plot(range(sampleSize), ylim, type = "n", ...)
for (dist.name in ls(pattern = paste0("^", measure.name))) {
mean <- get(dist.name)
median <- aggr.median[aggr.sd$sampleSize == sampleSize, dist.name]
sd <- aggr.sd[aggr.sd$sampleSize == sampleSize, dist.name]
min <- aggr.min[aggr.sd$sampleSize == sampleSize, dist.name]
max <- aggr.max[aggr.sd$sampleSize == sampleSize, dist.name]
method <- tail(strsplit(dist.name, ".", fixed = TRUE)[[1]], 1)
col <- col.methods[method]
lines(sampleSize, mean, type = "o", col = col, lty = 1, lwd = 2 + (method == "gmlm"))
lines(sampleSize, mean + sd, col = col, lty = 2, lwd = 0.8)
lines(sampleSize, mean - sd, col = col, lty = 2, lwd = 0.8)
lines(sampleSize, median, col = col, lty = 1, lwd = 1)
lines(sampleSize, min, col = col, lty = 3, lwd = 0.6)
lines(sampleSize, max, col = col, lty = 3, lwd = 0.6)
}
legend("topright", col = col.methods, lty = 1, legend = names(col.methods),
bty = "n", lwd = par("lwd"), pch = par("pch"))
})
# reset plotting default prameters
par(par.default)
}
timer.env <- new.env()
start.timer <- function() {
assign("start.time", proc.time()[["elapsed"]], envir = timer.env)
}
clear.timer <- function() {
assign("total.time", 0, envir = timer.env)
}
end.timer <- function() {
end.time <- proc.time()[["elapsed"]]
start.time <- get("start.time", envir = timer.env)
total.time <- get0("total.time", envir = timer.env)
if (is.null(total.time)) {
total.time <- 0
}
elapsed <- end.time - start.time
total.time <- total.time + elapsed
assign("total.time", total.time, envir = timer.env)
c(elapsed = elapsed, total.time = total.time)
}