2022-10-06 12:25:40 +00:00
|
|
|
library(tensorPredictors)
|
|
|
|
|
|
|
|
set.seed(314159265, "Mersenne-Twister", "Inversion", "Rejection")
|
|
|
|
|
|
|
|
### simulation configuration
|
2022-10-12 18:28:59 +00:00
|
|
|
file.prefix <- "sim-normal"
|
2022-10-06 12:25:40 +00:00
|
|
|
reps <- 100 # number of simulation replications
|
2022-10-11 17:09:55 +00:00
|
|
|
max.iter <- 10000 # maximum number of iterations for GMLM
|
2022-10-06 12:25:40 +00:00
|
|
|
sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n`
|
|
|
|
N <- 2000 # validation set size
|
|
|
|
p <- c(2, 3, 5) # preditor dimensions
|
|
|
|
q <- c(1, 2, 3) # functions of y dimensions (response dimensions)
|
2022-10-11 17:09:55 +00:00
|
|
|
r <- length(p)
|
2022-10-06 12:25:40 +00:00
|
|
|
|
|
|
|
# initial consistency checks
|
|
|
|
stopifnot(exprs = {
|
2022-10-11 17:09:55 +00:00
|
|
|
r == length(p)
|
|
|
|
r == length(q)
|
2022-10-06 12:25:40 +00:00
|
|
|
all(outer(p, sample.sizes, `<`))
|
|
|
|
})
|
|
|
|
|
2022-10-11 17:09:55 +00:00
|
|
|
# projection matrix `P_A` as a projection onto the span of `A`
|
|
|
|
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
|
|
|
|
|
2022-10-06 12:25:40 +00:00
|
|
|
# setup model parameters
|
|
|
|
alphas <- Map(matrix, Map(rnorm, p * q), p) # reduction matrices
|
|
|
|
Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), p) # mode scatter
|
|
|
|
eta1 <- 0 # intercept
|
|
|
|
|
|
|
|
# data sampling routine
|
2022-10-11 17:09:55 +00:00
|
|
|
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
|
2022-10-06 12:25:40 +00:00
|
|
|
# generate response (sample axis is last axis)
|
|
|
|
y <- sample.int(prod(q), n, replace = TRUE) # uniform samples
|
|
|
|
Fy <- array(outer(seq_len(prod(q)), y, `==`), dim = c(q, n))
|
|
|
|
Fy <- Fy - c(rowMeans(Fy, dims = r))
|
|
|
|
|
|
|
|
# sample predictors as X | Y = y (sample axis is last axis)
|
|
|
|
Deltas <- Map(solve, Omegas) # normal covariances
|
|
|
|
mu_y <- mlm(mlm(Fy, alphas) + c(eta1), Deltas) # conditional mean
|
|
|
|
X <- mu_y + rtensornorm(n, 0, Deltas, r + 1L) # responses X
|
|
|
|
|
|
|
|
# permute axis to requested get the sample axis
|
|
|
|
if (sample.axis != r + 1L) {
|
|
|
|
perm <- integer(r + 1L)
|
|
|
|
perm[sample.axis] <- r + 1L
|
|
|
|
perm[-sample.axis] <- seq_len(r)
|
|
|
|
X <- aperm(X, perm)
|
|
|
|
Fy <- aperm(Fy, perm)
|
|
|
|
}
|
|
|
|
|
2022-10-11 17:09:55 +00:00
|
|
|
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
|
2022-10-06 12:25:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
### Logging Errors and Warnings
|
|
|
|
# Register a global warning and error handler for logging warnings/errors with
|
|
|
|
# current simulation repetition session informatin allowing to reproduce problems
|
|
|
|
exceptionLogger <- function(ex) {
|
|
|
|
# retrieve current simulation repetition information
|
|
|
|
rep.info <- get("rep.info", envir = .GlobalEnv)
|
|
|
|
# setup an error log file with the same name as `file`
|
|
|
|
log <- paste0(rep.info$file, ".log")
|
|
|
|
# Write (append) condition message with reproduction info to the log
|
|
|
|
cat("\n\n------------------------------------------------------------\n",
|
|
|
|
sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n",
|
|
|
|
rep.info$file, rep.info$n, rep.info$rep,
|
|
|
|
paste(rep.info$.Random.seed, collapse = ","),
|
|
|
|
as.character.error(ex)
|
|
|
|
), sep = "", file = log, append = TRUE)
|
|
|
|
# add Traceback (see: `traceback()` which the following is addapted from)
|
|
|
|
n <- length(x <- .traceback(NULL, max.lines = -1L))
|
|
|
|
if (n == 0L) {
|
|
|
|
cat("No traceback available", "\n", file = log, append = TRUE)
|
|
|
|
} else {
|
|
|
|
for (i in 1L:n) {
|
|
|
|
xi <- x[[i]]
|
|
|
|
label <- paste0(n - i + 1L, ": ")
|
|
|
|
m <- length(xi)
|
|
|
|
srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) {
|
|
|
|
srcfile <- attr(srcref, "srcfile")
|
|
|
|
paste0(" at ", basename(srcfile$filename), "#", srcref[1L])
|
|
|
|
}
|
|
|
|
if (isTRUE(attr(xi, "truncated"))) {
|
|
|
|
xi <- c(xi, " ...")
|
|
|
|
m <- length(xi)
|
|
|
|
}
|
|
|
|
if (!is.null(srcloc)) {
|
|
|
|
xi[m] <- paste0(xi[m], srcloc)
|
|
|
|
}
|
|
|
|
if (m > 1) {
|
|
|
|
label <- c(label, rep(substr(" ", 1L,
|
|
|
|
nchar(label, type = "w")), m - 1L))
|
|
|
|
}
|
|
|
|
cat(paste0(label, xi), sep = "\n", file = log, append = TRUE)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
globalCallingHandlers(list(
|
|
|
|
message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
|
|
### for every sample size
|
|
|
|
start <- format(Sys.time(), "%Y%m%dT%H%M")
|
|
|
|
for (n in sample.sizes) {
|
|
|
|
### write new simulation result file
|
2022-10-12 18:28:59 +00:00
|
|
|
file <- paste0(paste(file.prefix, start, n, sep = "-"), ".csv")
|
2022-10-11 17:09:55 +00:00
|
|
|
# CSV header, used to ensure correct value/column mapping when writing to file
|
|
|
|
header <- outer(
|
|
|
|
c("dist.subspace", "dist.projection", "error.pred"), # measures
|
|
|
|
c("gmlm", "pca", "hopca", "tsir"), # methods
|
|
|
|
paste, sep = ".")
|
2022-10-06 12:25:40 +00:00
|
|
|
cat(paste0(header, collapse = ","), "\n", sep = "", file = file)
|
|
|
|
|
|
|
|
### repeated simulation
|
|
|
|
for (rep in seq_len(reps)) {
|
|
|
|
### Repetition session state info
|
|
|
|
# Stores specific session variables before starting the current
|
|
|
|
# simulation replication. This allows to log state information which
|
|
|
|
# can be used to replicate a specific simulation repetition in case of
|
|
|
|
# errors/warnings from the logs
|
|
|
|
rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed)
|
|
|
|
|
|
|
|
### sample (training) data
|
2022-10-11 17:09:55 +00:00
|
|
|
c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
|
2022-10-06 12:25:40 +00:00
|
|
|
|
|
|
|
### Fit data using different methods
|
2022-10-11 17:09:55 +00:00
|
|
|
fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis, max.iter = max.iter)
|
2022-10-06 12:25:40 +00:00
|
|
|
fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis)
|
|
|
|
fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q))
|
2022-10-11 17:09:55 +00:00
|
|
|
fit.tsir <- TSIR(X, y, q, sample.axis = sample.axis)
|
2022-10-06 12:25:40 +00:00
|
|
|
|
|
|
|
### Compute Reductions `B.*` where `B.*` spans the reduction subspaces
|
2022-10-12 18:28:59 +00:00
|
|
|
B.true <- Reduce(`%x%`, rev(alphas))
|
|
|
|
B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(alphas)))
|
2022-10-06 12:25:40 +00:00
|
|
|
B.hopca <- Reduce(`%x%`, rev(fit.hopca))
|
|
|
|
B.pca <- fit.pca$rotation
|
2022-10-11 17:09:55 +00:00
|
|
|
B.tsir <- Reduce(`%x%`, rev(fit.tsir))
|
2022-10-06 12:25:40 +00:00
|
|
|
|
|
|
|
# Subspace Distances: Normalized `|| P_A - P_B ||_F` where
|
|
|
|
# `P_A = A (A' A)^-1/2 A'` and the normalization means that with
|
|
|
|
# respect to the dimensions of `A, B` the subspace distance is in the
|
|
|
|
# range `[0, 1]`.
|
|
|
|
dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE)
|
|
|
|
dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
|
|
|
|
dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE)
|
2022-10-11 17:09:55 +00:00
|
|
|
dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE)
|
2022-10-06 12:25:40 +00:00
|
|
|
|
|
|
|
# Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
|
|
|
|
dist.projection.gmlm <- dist.projection(B.true, B.gmlm)
|
|
|
|
dist.projection.hopca <- dist.projection(B.true, B.hopca)
|
|
|
|
dist.projection.pca <- dist.projection(B.true, B.pca)
|
2022-10-11 17:09:55 +00:00
|
|
|
dist.projection.tsir <- dist.projection(B.true, B.tsir)
|
2022-10-06 12:25:40 +00:00
|
|
|
|
|
|
|
### Prediction Errors: (using new independend sample of size `N`)
|
2022-10-11 17:09:55 +00:00
|
|
|
c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas)
|
2022-10-06 12:25:40 +00:00
|
|
|
# centered model matrix of vectorized `X`s
|
|
|
|
vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE)
|
|
|
|
P.true <- proj(B.true)
|
|
|
|
error.pred.gmlm <- norm(P.true - proj(B.gmlm), "2")
|
|
|
|
error.pred.hopca <- norm(P.true - proj(B.hopca), "2")
|
|
|
|
error.pred.pca <- norm(P.true - proj(B.pca), "2")
|
2022-10-11 17:09:55 +00:00
|
|
|
error.pred.tsir <- norm(P.true - proj(B.tsir), "2")
|
2022-10-06 12:25:40 +00:00
|
|
|
|
|
|
|
# format estimation/prediction errors and write to file and console
|
|
|
|
line <- paste0(Map(get, header), collapse = ",")
|
|
|
|
cat(line, "\n", sep = "", file = file, append = TRUE)
|
|
|
|
# report progress
|
|
|
|
cat(sprintf("sample size: %d/%d - rep: %d/%d\n",
|
|
|
|
which(n == sample.sizes), length(sample.sizes), rep, reps))
|
|
|
|
}
|
|
|
|
}
|