tensor_predictors/sim/ising.R

205 lines
8.3 KiB
R

library(tensorPredictors)
library(mvbernoulli)
set.seed(161803399, "Mersenne-Twister", "Inversion", "Rejection")
### simulation configuration
file.prefix <- "sim-ising"
reps <- 100 # number of simulation replications
max.iter <- 100 # maximum number of iterations for GMLM
sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n`
N <- 2000 # validation set size
p <- c(4, 4) # preditor dimensions (ONLY 4 by 4 allowed!)
q <- c(2, 2) # response dimensions (ONLY 2 by 2 allowed!)
r <- length(p)
# parameter configuration
rho <- -0.55
c1 <- 1
c2 <- 1
# initial consistency checks
stopifnot(exprs = {
r == 2
all.equal(p, c(4, 4))
all.equal(q, c(2, 2))
})
### small helpers
# 270 deg matrix layout rotation (90 deg clockwise)
rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
# Auto-Regression Covariance Matrix
AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
# Inverse of the AR matrix
AR.inv <- function(rho, dim) {
A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
A / (1 - rho^2)
}
# projection matrix `P_A` as a projection onto the span of `A`
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
### setup Ising parameters (to get reasonable parameters)
eta1 <- 0
alphas <- Map(function(pj, qj) { # qj ignored, its 2
linspace <- seq(-1, 1, length.out = pj)
matrix(c(linspace, linspace^2), pj, 2)
}, p, q)
Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
# data sampling routine
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
# generate response (sample axis is last axis)
y <- runif(n, -1, 1) # Y ~ U[-1, 1]
Fy <- rbind(cos(pi * y), sin(pi * y), -sin(pi * y), cos(pi * y))
dim(Fy) <- c(2, 2, n)
# natural exponential family parameters
eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
# conditional Ising model parameters
theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
ltri <- which(lower.tri(eta_y2, diag = TRUE))
diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
theta_y[diagonal, ] <- eta_y1
# Sample X from conditional distribution
X <- apply(theta_y, 2, ising_sample, n = 1)
# convert (from compressed integer vector) to array data
attr(X, "p") <- prod(p)
X <- t(as.mvbmatrix(X))
dim(X) <- c(p, n)
storage.mode(X) <- "double"
# permute axis to requested get the sample axis
if (sample.axis != r + 1L) {
perm <- integer(r + 1L)
perm[sample.axis] <- r + 1L
perm[-sample.axis] <- seq_len(r)
X <- aperm(X, perm)
Fy <- aperm(Fy, perm)
}
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
}
### Logging Errors and Warnings
# Register a global warning and error handler for logging warnings/errors with
# current simulation repetition session informatin allowing to reproduce problems
exceptionLogger <- function(ex) {
# retrieve current simulation repetition information
rep.info <- get("rep.info", envir = .GlobalEnv)
# setup an error log file with the same name as `file`
log <- paste0(rep.info$file, ".log")
# Write (append) condition message with reproduction info to the log
cat("\n\n------------------------------------------------------------\n",
sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n",
rep.info$file, rep.info$n, rep.info$rep,
paste(rep.info$.Random.seed, collapse = ","),
as.character.error(ex)
), sep = "", file = log, append = TRUE)
# add Traceback (see: `traceback()` which the following is addapted from)
n <- length(x <- .traceback(NULL, max.lines = -1L))
if (n == 0L) {
cat("No traceback available", "\n", file = log, append = TRUE)
} else {
for (i in 1L:n) {
xi <- x[[i]]
label <- paste0(n - i + 1L, ": ")
m <- length(xi)
srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) {
srcfile <- attr(srcref, "srcfile")
paste0(" at ", basename(srcfile$filename), "#", srcref[1L])
}
if (isTRUE(attr(xi, "truncated"))) {
xi <- c(xi, " ...")
m <- length(xi)
}
if (!is.null(srcloc)) {
xi[m] <- paste0(xi[m], srcloc)
}
if (m > 1) {
label <- c(label, rep(substr(" ", 1L,
nchar(label, type = "w")), m - 1L))
}
cat(paste0(label, xi), sep = "\n", file = log, append = TRUE)
}
}
}
globalCallingHandlers(list(
message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger
))
### for every sample size
start <- format(Sys.time(), "%Y%m%dT%H%M")
for (n in sample.sizes) {
### write new simulation result file
file <- paste0(paste(file.prefix, start, n, sep = "-"), ".csv")
# CSV header, used to ensure correct value/column mapping when writing to file
header <- outer(
c("dist.subspace", "dist.projection", "error.pred"), # measures
c("gmlm", "pca", "hopca", "tsir"), # methods
paste, sep = ".")
cat(paste0(header, collapse = ","), "\n", sep = "", file = file)
### repeated simulation
for (rep in seq_len(reps)) {
### Repetition session state info
# Stores specific session variables before starting the current
# simulation replication. This allows to log state information which
# can be used to replicate a specific simulation repetition in case of
# errors/warnings from the logs
rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed)
### sample (training) data
c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
### Fit data using different methods
fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis,
max.iter = max.iter, family = "ising")
fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis)
fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q))
fit.tsir <- NA # TSIR(X, y, q, sample.axis = sample.axis)
### Compute Reductions `B.*` where `B.*` spans the reduction subspaces
B.true <- Reduce(`%x%`, rev(alphas))
B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(alphas)))
B.hopca <- Reduce(`%x%`, rev(fit.hopca))
B.pca <- fit.pca$rotation
B.tsir <- NA # Reduce(`%x%`, rev(fit.tsir))
# Subspace Distances: Normalized `|| P_A - P_B ||_F` where
# `P_A = A (A' A)^-1/2 A'` and the normalization means that with
# respect to the dimensions of `A, B` the subspace distance is in the
# range `[0, 1]`.
dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE)
dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE)
dist.subspace.tsir <- NA # dist.subspace(B.true, B.tsir, normalize = TRUE)
# Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
dist.projection.gmlm <- dist.projection(B.true, B.gmlm)
dist.projection.hopca <- dist.projection(B.true, B.hopca)
dist.projection.pca <- dist.projection(B.true, B.pca)
dist.projection.tsir <- NA # dist.projection(B.true, B.tsir)
### Prediction Errors: (using new independend sample of size `N`)
c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas)
# centered model matrix of vectorized `X`s
vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE)
P.true <- proj(B.true)
error.pred.gmlm <- norm(P.true - proj(B.gmlm), "2")
error.pred.hopca <- norm(P.true - proj(B.hopca), "2")
error.pred.pca <- norm(P.true - proj(B.pca), "2")
error.pred.tsir <- NA # norm(P.true - proj(B.tsir), "2")
# format estimation/prediction errors and write to file and console
line <- paste0(Map(get, header), collapse = ",")
cat(line, "\n", sep = "", file = file, append = TRUE)
# report progress
cat(sprintf("sample size: %d/%d - rep: %d/%d\n",
which(n == sample.sizes), length(sample.sizes), rep, reps))
}
}