tensor_predictors/sim/normal.R

172 lines
7.5 KiB
R

library(tensorPredictors)
set.seed(314159265, "Mersenne-Twister", "Inversion", "Rejection")
### simulation configuration
file.prefix <- "sim-normal"
reps <- 100 # number of simulation replications
max.iter <- 10000 # maximum number of iterations for GMLM
sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n`
N <- 2000 # validation set size
p <- c(2, 3, 5) # preditor dimensions
q <- c(1, 2, 3) # functions of y dimensions (response dimensions)
r <- length(p)
# initial consistency checks
stopifnot(exprs = {
r == length(p)
r == length(q)
all(outer(p, sample.sizes, `<`))
})
# projection matrix `P_A` as a projection onto the span of `A`
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
# setup model parameters
alphas <- Map(matrix, Map(rnorm, p * q), p) # reduction matrices
Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), p) # mode scatter
eta1 <- 0 # intercept
# data sampling routine
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
# generate response (sample axis is last axis)
y <- sample.int(prod(q), n, replace = TRUE) # uniform samples
Fy <- array(outer(seq_len(prod(q)), y, `==`), dim = c(q, n))
Fy <- Fy - c(rowMeans(Fy, dims = r))
# sample predictors as X | Y = y (sample axis is last axis)
Deltas <- Map(solve, Omegas) # normal covariances
mu_y <- mlm(mlm(Fy, alphas) + c(eta1), Deltas) # conditional mean
X <- mu_y + rtensornorm(n, 0, Deltas, r + 1L) # responses X
# permute axis to requested get the sample axis
if (sample.axis != r + 1L) {
perm <- integer(r + 1L)
perm[sample.axis] <- r + 1L
perm[-sample.axis] <- seq_len(r)
X <- aperm(X, perm)
Fy <- aperm(Fy, perm)
}
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
}
### Logging Errors and Warnings
# Register a global warning and error handler for logging warnings/errors with
# current simulation repetition session informatin allowing to reproduce problems
exceptionLogger <- function(ex) {
# retrieve current simulation repetition information
rep.info <- get("rep.info", envir = .GlobalEnv)
# setup an error log file with the same name as `file`
log <- paste0(rep.info$file, ".log")
# Write (append) condition message with reproduction info to the log
cat("\n\n------------------------------------------------------------\n",
sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n",
rep.info$file, rep.info$n, rep.info$rep,
paste(rep.info$.Random.seed, collapse = ","),
as.character.error(ex)
), sep = "", file = log, append = TRUE)
# add Traceback (see: `traceback()` which the following is addapted from)
n <- length(x <- .traceback(NULL, max.lines = -1L))
if (n == 0L) {
cat("No traceback available", "\n", file = log, append = TRUE)
} else {
for (i in 1L:n) {
xi <- x[[i]]
label <- paste0(n - i + 1L, ": ")
m <- length(xi)
srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) {
srcfile <- attr(srcref, "srcfile")
paste0(" at ", basename(srcfile$filename), "#", srcref[1L])
}
if (isTRUE(attr(xi, "truncated"))) {
xi <- c(xi, " ...")
m <- length(xi)
}
if (!is.null(srcloc)) {
xi[m] <- paste0(xi[m], srcloc)
}
if (m > 1) {
label <- c(label, rep(substr(" ", 1L,
nchar(label, type = "w")), m - 1L))
}
cat(paste0(label, xi), sep = "\n", file = log, append = TRUE)
}
}
}
globalCallingHandlers(list(
message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger
))
### for every sample size
start <- format(Sys.time(), "%Y%m%dT%H%M")
for (n in sample.sizes) {
### write new simulation result file
file <- paste0(paste(file.prefix, start, n, sep = "-"), ".csv")
# CSV header, used to ensure correct value/column mapping when writing to file
header <- outer(
c("dist.subspace", "dist.projection", "error.pred"), # measures
c("gmlm", "pca", "hopca", "tsir"), # methods
paste, sep = ".")
cat(paste0(header, collapse = ","), "\n", sep = "", file = file)
### repeated simulation
for (rep in seq_len(reps)) {
### Repetition session state info
# Stores specific session variables before starting the current
# simulation replication. This allows to log state information which
# can be used to replicate a specific simulation repetition in case of
# errors/warnings from the logs
rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed)
### sample (training) data
c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
### Fit data using different methods
fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis, max.iter = max.iter)
fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis)
fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q))
fit.tsir <- TSIR(X, y, q, sample.axis = sample.axis)
### Compute Reductions `B.*` where `B.*` spans the reduction subspaces
B.true <- Reduce(`%x%`, rev(alphas))
B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(alphas)))
B.hopca <- Reduce(`%x%`, rev(fit.hopca))
B.pca <- fit.pca$rotation
B.tsir <- Reduce(`%x%`, rev(fit.tsir))
# Subspace Distances: Normalized `|| P_A - P_B ||_F` where
# `P_A = A (A' A)^-1/2 A'` and the normalization means that with
# respect to the dimensions of `A, B` the subspace distance is in the
# range `[0, 1]`.
dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE)
dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE)
dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE)
# Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
dist.projection.gmlm <- dist.projection(B.true, B.gmlm)
dist.projection.hopca <- dist.projection(B.true, B.hopca)
dist.projection.pca <- dist.projection(B.true, B.pca)
dist.projection.tsir <- dist.projection(B.true, B.tsir)
### Prediction Errors: (using new independend sample of size `N`)
c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas)
# centered model matrix of vectorized `X`s
vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE)
P.true <- proj(B.true)
error.pred.gmlm <- norm(P.true - proj(B.gmlm), "2")
error.pred.hopca <- norm(P.true - proj(B.hopca), "2")
error.pred.pca <- norm(P.true - proj(B.pca), "2")
error.pred.tsir <- norm(P.true - proj(B.tsir), "2")
# format estimation/prediction errors and write to file and console
line <- paste0(Map(get, header), collapse = ",")
cat(line, "\n", sep = "", file = file, append = TRUE)
# report progress
cat(sprintf("sample size: %d/%d - rep: %d/%d\n",
which(n == sample.sizes), length(sample.sizes), rep, reps))
}
}