tensor_predictors/sim/sim_utils.R

#' Some utility function used in simulations

#' Computes the orthogonal projection matrix onto the span of `A`
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))


#' Logging Errors and Warnings
#'
#' Register a global warning and error handler for logging warnings/errors with
#' current simulation repetition session informatin allowing to reproduce problems
#'
#' @examples
#' # Usage
#' globalCallingHandlers(list(
#'     message = exceptionLogger("warning.log"),
#'     warning = exceptionLogger("warning.log"),
#'     error = exceptionLogger("error.log")
#' ))
#' # Do some stuff where an error might occure
#' for (rep in 1:1000) {
#'     # Store additional information logged with an error when an exception occures
#'     storeExceptionInfo(rep = rep)
#'     # Do work
#'     stopifnot(rep < 17)
#' }
#'
assign(".exception.info", NULL, env = .GlobalEnv)
exceptionLogger <- function(file.name) {
    force(file.name)
    function(ex) {
        log <- file(file.name, open = "a+")
        cat("\n### Log At: ", format(Sys.time()), "\n", file = log)
        cat("# Exception:\n", file = log)
        cat(as.character.error(ex), file = log)
        cat("\n# Exception Info:\n", file = log)
        dump(".exception.info", envir = .GlobalEnv, file = log)
        cat("\n# Traceback:\n", file = log)

        # add Traceback (see: `traceback()` which the following is addapted from)
        n <- length(x <- .traceback(NULL, max.lines = -1L))
        if (n == 0L) {
            cat("No traceback available", "\n", file = log)
        } else {
            for (i in 1L:n) {
                xi <- x[[i]]
                label <- paste0(n - i + 1L, ": ")
                m <- length(xi)
                srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) {
                    srcfile <- attr(srcref, "srcfile")
                    paste0(" at ", basename(srcfile$filename), "#", srcref[1L])
                }
                if (isTRUE(attr(xi, "truncated"))) {
                    xi <- c(xi, " ...")
                    m <- length(xi)
                }
                if (!is.null(srcloc)) {
                    xi[m] <- paste0(xi[m], srcloc)
                }
                if (m > 1) {
                    label <- c(label, rep(substr("          ", 1L,
                    nchar(label, type = "w")), m - 1L))
                }
                cat(paste0(label, xi), sep = "\n", file = log)
            }
        }
        close(log)
    }
}

#' Used in conjuntion with `exceptionLogger()`
storeExceptionInfo <- function(...) {
    info <- list(...)
    info$RNGking <- RNGkind()
    info$.Random.seed <- get0(".Random.seed", envir = .GlobalEnv)
    .GlobalEnv$.exception.info <- info
}


### Simulation logging routine
#' @examples
#' # Create a CSV logger
#' logger <- CSV.logger("test.csv", header = c("A", "B", "C"))
#'
#' # Store some values in current environment
#' A <- 1
#' B <- 2
#' # write values to csv file (order irrelevent)
#' logger(C = -3, B = -2)
#'
#' # another line
#' A <- 10
#' B <- 20
#' logger(B = -20, C = -30)
#'
#' # read the file back in
#' read.csv("test.csv")
#'
#' # In simulations it is often usefull to time stamp the files
#' nr <- 5
#' logger <- CSV.logger(
#'     sprintf("test-nr%03d-%s.csv", nr, format(Sys.time(), "%Y%m%dT%H%M")),
#'     header = c("A", "B", "C")
#' )
#'
CSV.logger <- function(file.name, header) {
    force(file.name)

    # CSV header, used to ensure correct value/column mapping when writing to file
    force(header)
    cat(paste0(header, collapse = ","), "\n", sep = "", file = file.name)

    function(...) {
        # get directly provided data
        arg.data <- list(...)
        # all arguments must be given with a name
        if (length(arg.data) && is.null(names(arg.data))) {
            stop("Arguments must be given with names")
        }
        # check if all elements have a described CSV header column
        unknown <- !(names(arg.data) %in% header)
        if (any(unknown)) {
            stop("Got unknown columns: ", paste0(names(arg.data)[unknown], collapse = ", "))
        }
        # get missing values from environment
        missing <- !(header %in% names(arg.data))
        env <- parent.frame()
        data <- c(arg.data, mget(header[missing], envir = env))
        # Format all aguments
        data <- Map(format, data)
        # collaps into single line
        line <- paste0(data[header], collapse = ",")
        # write data line to file
        cat(line, "\n", sep = "", file = file.name, append = TRUE)
    }
}


# Set colors for every method
methods <- c("gmlm", "pca", "hopca", "tsir", "mgcca", "lpca", "clpca", "tnormal", "sir")
col.methods <- palette.colors(n = length(methods), palette = "Okabe-Ito", recycle = FALSE)
names(col.methods) <- methods


# Comparison plot of one measure for a simulation
plot.sim <- function(sim, measure.name, ..., ylim = c(0, 1), cols = col.methods) {
    par.default <- par(pch = 16, bty = "n", lty = "solid", lwd = 1.5)

    # extract method names from color specification
    methods <- names(cols)

    # Remain sample size grouping variable to avoid conflicts
    aggr.mean   <- aggregate(sim, list(sampleSize = sim$sample.size), mean)
    aggr.median <- aggregate(sim, list(sampleSize = sim$sample.size), median)
    aggr.sd     <- aggregate(sim, list(sampleSize = sim$sample.size), sd)
    aggr.min    <- aggregate(sim, list(sampleSize = sim$sample.size), min)
    aggr.max    <- aggregate(sim, list(sampleSize = sim$sample.size), max)

    with(aggr.mean, {
        plot(range(sampleSize), ylim, type = "n", ...)
        for (dist.name in ls(pattern = paste0("^", measure.name))) {
            mean <- get(dist.name)
            median <- aggr.median[aggr.sd$sampleSize == sampleSize, dist.name]
            sd     <-     aggr.sd[aggr.sd$sampleSize == sampleSize, dist.name]
            min    <-    aggr.min[aggr.sd$sampleSize == sampleSize, dist.name]
            max    <-    aggr.max[aggr.sd$sampleSize == sampleSize, dist.name]
            method <- tail(strsplit(dist.name, ".", fixed = TRUE)[[1]], 1)
            col <- cols[method]
            lines(sampleSize, mean, type = "o", col = col, lty = 1, lwd = 2 + (method == "gmlm"))
            lines(sampleSize, mean + sd,        col = col, lty = 2, lwd = 0.8)
            lines(sampleSize, mean - sd,        col = col, lty = 2, lwd = 0.8)
            lines(sampleSize, median,           col = col, lty = 1, lwd = 1)
            lines(sampleSize, min,              col = col, lty = 3, lwd = 0.6)
            lines(sampleSize, max,              col = col, lty = 3, lwd = 0.6)
        }

        legend("topright", col = cols, lty = 1, legend = names(cols),
            bty = "n", lwd = par("lwd"), pch = par("pch"))
    })

    # reset plotting default prameters
    par(par.default)
}


timer.env <- new.env()
start.timer <- function() {
    assign("start.time", proc.time()[["elapsed"]], envir = timer.env)
}
clear.timer <- function() {
    assign("total.time", 0, envir = timer.env)
}
end.timer <- function() {
    end.time <- proc.time()[["elapsed"]]
    start.time <- get("start.time", envir = timer.env)
    total.time <- get0("total.time", envir = timer.env)
    if (is.null(total.time)) {
        total.time <- 0
    }
    elapsed <- end.time - start.time
    total.time <- total.time + elapsed
    assign("total.time", total.time, envir = timer.env)
    c(elapsed = elapsed, total.time = total.time)
}