tensor_predictors/sim/ising.R

library(tensorPredictors)
library(mvbernoulli)

set.seed(161803399, "Mersenne-Twister", "Inversion", "Rejection")

### simulation configuration
file.prefix <- "sim-ising"
reps <- 100                     # number of simulation replications
max.iter <- 100                 # maximum number of iterations for GMLM
sample.sizes <- c(100, 200, 300, 500, 750)  # sample sizes `n`
N <- 2000                       # validation set size
p <- c(4, 4)                    # preditor dimensions (ONLY 4 by 4 allowed!)
q <- c(2, 2)                    # response dimensions (ONLY 2 by 2 allowed!)
r <- length(p)
# parameter configuration
rho <- -0.55
c1 <- 1
c2 <- 1

# initial consistency checks
stopifnot(exprs = {
    r == 2
    all.equal(p, c(4, 4))
    all.equal(q, c(2, 2))
})

### small helpers
# 270 deg matrix layout rotation (90 deg clockwise)
rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
# Auto-Regression Covariance Matrix
AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
# Inverse of the AR matrix
AR.inv <- function(rho, dim) {
    A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
    A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
    A / (1 - rho^2)
}
# projection matrix `P_A` as a projection onto the span of `A`
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))

### setup Ising parameters (to get reasonable parameters)
eta1 <- 0
alphas <- Map(function(pj, qj) {    # qj ignored, its 2
    linspace <- seq(-1, 1, length.out = pj)
    matrix(c(linspace, linspace^2), pj, 2)
}, p, q)
Omegas <- Map(AR, dim = p, MoreArgs = list(rho))

# data sampling routine
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
    # generate response (sample axis is last axis)
    y <- runif(n, -1, 1)                    # Y ~ U[-1, 1]
    Fy <- rbind(cos(pi * y), sin(pi * y), -sin(pi * y), cos(pi * y))
    dim(Fy) <- c(2, 2, n)

    # natural exponential family parameters
    eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
    eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))

    # conditional Ising model parameters
    theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
    ltri <- which(lower.tri(eta_y2, diag = TRUE))
    diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
    theta_y[diagonal, ] <- eta_y1

    # Sample X from conditional distribution
    X <- apply(theta_y, 2, ising_sample, n = 1)
    # convert (from compressed integer vector) to array data
    attr(X, "p") <- prod(p)
    X <- t(as.mvbmatrix(X))
    dim(X) <- c(p, n)
    storage.mode(X) <- "double"

    # permute axis to requested get the sample axis
    if (sample.axis != r + 1L) {
        perm <- integer(r + 1L)
        perm[sample.axis] <- r + 1L
        perm[-sample.axis] <- seq_len(r)
        X <- aperm(X, perm)
        Fy <- aperm(Fy, perm)
    }

    list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
}

### Logging Errors and Warnings
# Register a global warning and error handler for logging warnings/errors with
# current simulation repetition session informatin allowing to reproduce problems
exceptionLogger <- function(ex) {
    # retrieve current simulation repetition information
    rep.info <- get("rep.info", envir = .GlobalEnv)
    # setup an error log file with the same name as `file`
    log <- paste0(rep.info$file, ".log")
    # Write (append) condition message with reproduction info to the log
    cat("\n\n------------------------------------------------------------\n",
        sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n",
            rep.info$file, rep.info$n, rep.info$rep,
            paste(rep.info$.Random.seed, collapse = ","),
            as.character.error(ex)
    ), sep = "", file = log, append = TRUE)
    # add Traceback (see: `traceback()` which the following is addapted from)
    n <- length(x <- .traceback(NULL, max.lines = -1L))
    if (n == 0L) {
        cat("No traceback available", "\n", file = log, append = TRUE)
    } else {
        for (i in 1L:n) {
            xi <- x[[i]]
            label <- paste0(n - i + 1L, ": ")
            m <- length(xi)
            srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) {
                srcfile <- attr(srcref, "srcfile")
                paste0(" at ", basename(srcfile$filename), "#", srcref[1L])
            }
            if (isTRUE(attr(xi, "truncated"))) {
                xi <- c(xi, " ...")
                m <- length(xi)
            }
            if (!is.null(srcloc)) {
                xi[m] <- paste0(xi[m], srcloc)
            }
            if (m > 1) {
                label <- c(label, rep(substr("          ", 1L,
                  nchar(label, type = "w")), m - 1L))
            }
            cat(paste0(label, xi), sep = "\n", file = log, append = TRUE)
        }
    }
}
globalCallingHandlers(list(
    message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger
))


### for every sample size
start <- format(Sys.time(), "%Y%m%dT%H%M")
for (n in sample.sizes) {
    ### write new simulation result file
    file <- paste0(paste(file.prefix, start, n, sep = "-"), ".csv")
    # CSV header, used to ensure correct value/column mapping when writing to file
    header <- outer(
        c("dist.subspace", "dist.projection", "error.pred"),  # measures
        c("gmlm", "pca", "hopca", "tsir"),                    # methods
        paste, sep = ".")
    cat(paste0(header, collapse = ","), "\n", sep = "", file = file)

    ### repeated simulation
    for (rep in seq_len(reps)) {
        ### Repetition session state info
        # Stores specific session variables before starting the current
        # simulation replication. This allows to log state information which
        # can be used to replicate a specific simulation repetition in case of
        # errors/warnings from the logs
        rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed)

        ### sample (training) data
        c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)

        ### Fit data using different methods
        fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis,
            max.iter = max.iter, family = "ising")
        fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis)
        fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q))
        fit.tsir <- NA # TSIR(X, y, q, sample.axis = sample.axis)

        ### Compute Reductions `B.*` where `B.*` spans the reduction subspaces
        B.true <- Reduce(`%x%`, rev(alphas))
        B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(alphas)))
        B.hopca <- Reduce(`%x%`, rev(fit.hopca))
        B.pca <- fit.pca$rotation
        B.tsir <- NA # Reduce(`%x%`, rev(fit.tsir))

        # Subspace Distances: Normalized `|| P_A - P_B ||_F` where
        #   `P_A = A (A' A)^-1/2 A'` and the normalization means that with
        #   respect to the dimensions of `A, B` the subspace distance is in the
        #   range `[0, 1]`.
        dist.subspace.gmlm  <- dist.subspace(B.true, B.gmlm,  normalize = TRUE)
        dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
        dist.subspace.pca   <- dist.subspace(B.true, B.pca,   normalize = TRUE)
        dist.subspace.tsir  <- NA # dist.subspace(B.true, B.tsir,  normalize = TRUE)

        # Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
        dist.projection.gmlm  <- dist.projection(B.true, B.gmlm)
        dist.projection.hopca <- dist.projection(B.true, B.hopca)
        dist.projection.pca   <- dist.projection(B.true, B.pca)
        dist.projection.tsir  <- NA # dist.projection(B.true, B.tsir)

        ### Prediction Errors: (using new independend sample of size `N`)
        c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas)
        # centered model matrix of vectorized `X`s
        vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE)
        P.true <- proj(B.true)
        error.pred.gmlm  <- norm(P.true - proj(B.gmlm),  "2")
        error.pred.hopca <- norm(P.true - proj(B.hopca), "2")
        error.pred.pca   <- norm(P.true - proj(B.pca),   "2")
        error.pred.tsir  <- NA # norm(P.true - proj(B.tsir),  "2")

        # format estimation/prediction errors and write to file and console
        line <- paste0(Map(get, header), collapse = ",")
        cat(line, "\n", sep = "", file = file, append = TRUE)
        # report progress
        cat(sprintf("sample size: %d/%d - rep: %d/%d\n",
            which(n == sample.sizes), length(sample.sizes), rep, reps))
    }
}