library(tensorPredictors) library(mvbernoulli) set.seed(161803399, "Mersenne-Twister", "Inversion", "Rejection") ### simulation configuration file.prefix <- "sim-ising" reps <- 100 # number of simulation replications max.iter <- 100 # maximum number of iterations for GMLM sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n` N <- 2000 # validation set size p <- c(4, 4) # preditor dimensions (ONLY 4 by 4 allowed!) q <- c(2, 2) # response dimensions (ONLY 2 by 2 allowed!) r <- length(p) # parameter configuration rho <- -0.55 c1 <- 1 c2 <- 1 # initial consistency checks stopifnot(exprs = { r == 2 all.equal(p, c(4, 4)) all.equal(q, c(2, 2)) }) ### small helpers # 270 deg matrix layout rotation (90 deg clockwise) rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE] # Auto-Regression Covariance Matrix AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`)) # Inverse of the AR matrix AR.inv <- function(rho, dim) { A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1)) A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho A / (1 - rho^2) } # projection matrix `P_A` as a projection onto the span of `A` proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A))) ### setup Ising parameters (to get reasonable parameters) eta1 <- 0 alphas <- Map(function(pj, qj) { # qj ignored, its 2 linspace <- seq(-1, 1, length.out = pj) matrix(c(linspace, linspace^2), pj, 2) }, p, q) Omegas <- Map(AR, dim = p, MoreArgs = list(rho)) # data sampling routine sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) { # generate response (sample axis is last axis) y <- runif(n, -1, 1) # Y ~ U[-1, 1] Fy <- rbind(cos(pi * y), sin(pi * y), -sin(pi * y), cos(pi * y)) dim(Fy) <- c(2, 2, n) # natural exponential family parameters eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1)) eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas)) # conditional Ising model parameters theta_y <- matrix(rep(vech(eta_y2), n), ncol = n) ltri <- which(lower.tri(eta_y2, diag = TRUE)) diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri]) theta_y[diagonal, ] <- eta_y1 # Sample X from conditional distribution X <- apply(theta_y, 2, ising_sample, n = 1) # convert (from compressed integer vector) to array data attr(X, "p") <- prod(p) X <- t(as.mvbmatrix(X)) dim(X) <- c(p, n) storage.mode(X) <- "double" # permute axis to requested get the sample axis if (sample.axis != r + 1L) { perm <- integer(r + 1L) perm[sample.axis] <- r + 1L perm[-sample.axis] <- seq_len(r) X <- aperm(X, perm) Fy <- aperm(Fy, perm) } list(X = X, Fy = Fy, y = y, sample.axis = sample.axis) } ### Logging Errors and Warnings # Register a global warning and error handler for logging warnings/errors with # current simulation repetition session informatin allowing to reproduce problems exceptionLogger <- function(ex) { # retrieve current simulation repetition information rep.info <- get("rep.info", envir = .GlobalEnv) # setup an error log file with the same name as `file` log <- paste0(rep.info$file, ".log") # Write (append) condition message with reproduction info to the log cat("\n\n------------------------------------------------------------\n", sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n", rep.info$file, rep.info$n, rep.info$rep, paste(rep.info$.Random.seed, collapse = ","), as.character.error(ex) ), sep = "", file = log, append = TRUE) # add Traceback (see: `traceback()` which the following is addapted from) n <- length(x <- .traceback(NULL, max.lines = -1L)) if (n == 0L) { cat("No traceback available", "\n", file = log, append = TRUE) } else { for (i in 1L:n) { xi <- x[[i]] label <- paste0(n - i + 1L, ": ") m <- length(xi) srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) { srcfile <- attr(srcref, "srcfile") paste0(" at ", basename(srcfile$filename), "#", srcref[1L]) } if (isTRUE(attr(xi, "truncated"))) { xi <- c(xi, " ...") m <- length(xi) } if (!is.null(srcloc)) { xi[m] <- paste0(xi[m], srcloc) } if (m > 1) { label <- c(label, rep(substr(" ", 1L, nchar(label, type = "w")), m - 1L)) } cat(paste0(label, xi), sep = "\n", file = log, append = TRUE) } } } globalCallingHandlers(list( message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger )) ### for every sample size start <- format(Sys.time(), "%Y%m%dT%H%M") for (n in sample.sizes) { ### write new simulation result file file <- paste0(paste(file.prefix, start, n, sep = "-"), ".csv") # CSV header, used to ensure correct value/column mapping when writing to file header <- outer( c("dist.subspace", "dist.projection", "error.pred"), # measures c("gmlm", "pca", "hopca", "tsir"), # methods paste, sep = ".") cat(paste0(header, collapse = ","), "\n", sep = "", file = file) ### repeated simulation for (rep in seq_len(reps)) { ### Repetition session state info # Stores specific session variables before starting the current # simulation replication. This allows to log state information which # can be used to replicate a specific simulation repetition in case of # errors/warnings from the logs rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed) ### sample (training) data c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas) ### Fit data using different methods fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis, max.iter = max.iter, family = "ising") fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis) fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q)) fit.tsir <- NA # TSIR(X, y, q, sample.axis = sample.axis) ### Compute Reductions `B.*` where `B.*` spans the reduction subspaces B.true <- Reduce(`%x%`, rev(alphas)) B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(alphas))) B.hopca <- Reduce(`%x%`, rev(fit.hopca)) B.pca <- fit.pca$rotation B.tsir <- NA # Reduce(`%x%`, rev(fit.tsir)) # Subspace Distances: Normalized `|| P_A - P_B ||_F` where # `P_A = A (A' A)^-1/2 A'` and the normalization means that with # respect to the dimensions of `A, B` the subspace distance is in the # range `[0, 1]`. dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE) dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE) dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE) dist.subspace.tsir <- NA # dist.subspace(B.true, B.tsir, normalize = TRUE) # Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`. dist.projection.gmlm <- dist.projection(B.true, B.gmlm) dist.projection.hopca <- dist.projection(B.true, B.hopca) dist.projection.pca <- dist.projection(B.true, B.pca) dist.projection.tsir <- NA # dist.projection(B.true, B.tsir) ### Prediction Errors: (using new independend sample of size `N`) c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas) # centered model matrix of vectorized `X`s vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE) P.true <- proj(B.true) error.pred.gmlm <- norm(P.true - proj(B.gmlm), "2") error.pred.hopca <- norm(P.true - proj(B.hopca), "2") error.pred.pca <- norm(P.true - proj(B.pca), "2") error.pred.tsir <- NA # norm(P.true - proj(B.tsir), "2") # format estimation/prediction errors and write to file and console line <- paste0(Map(get, header), collapse = ",") cat(line, "\n", sep = "", file = file, append = TRUE) # report progress cat(sprintf("sample size: %d/%d - rep: %d/%d\n", which(n == sample.sizes), length(sample.sizes), rep, reps)) } }