library(tensorPredictors) set.seed(314159265, "Mersenne-Twister", "Inversion", "Rejection") ### simulation configuration reps <- 100 # number of simulation replications max.iter <- 10000 # maximum number of iterations for GMLM sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n` N <- 2000 # validation set size p <- c(2, 3, 5) # preditor dimensions q <- c(1, 2, 3) # functions of y dimensions (response dimensions) r <- length(p) # initial consistency checks stopifnot(exprs = { r == length(p) r == length(q) all(outer(p, sample.sizes, `<`)) }) # projection matrix `P_A` as a projection onto the span of `A` proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A))) # setup model parameters alphas <- Map(matrix, Map(rnorm, p * q), p) # reduction matrices Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), p) # mode scatter eta1 <- 0 # intercept # data sampling routine sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) { # generate response (sample axis is last axis) y <- sample.int(prod(q), n, replace = TRUE) # uniform samples Fy <- array(outer(seq_len(prod(q)), y, `==`), dim = c(q, n)) Fy <- Fy - c(rowMeans(Fy, dims = r)) # sample predictors as X | Y = y (sample axis is last axis) Deltas <- Map(solve, Omegas) # normal covariances mu_y <- mlm(mlm(Fy, alphas) + c(eta1), Deltas) # conditional mean X <- mu_y + rtensornorm(n, 0, Deltas, r + 1L) # responses X # permute axis to requested get the sample axis if (sample.axis != r + 1L) { perm <- integer(r + 1L) perm[sample.axis] <- r + 1L perm[-sample.axis] <- seq_len(r) X <- aperm(X, perm) Fy <- aperm(Fy, perm) } list(X = X, Fy = Fy, y = y, sample.axis = sample.axis) } ### Logging Errors and Warnings # Register a global warning and error handler for logging warnings/errors with # current simulation repetition session informatin allowing to reproduce problems exceptionLogger <- function(ex) { # retrieve current simulation repetition information rep.info <- get("rep.info", envir = .GlobalEnv) # setup an error log file with the same name as `file` log <- paste0(rep.info$file, ".log") # Write (append) condition message with reproduction info to the log cat("\n\n------------------------------------------------------------\n", sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n", rep.info$file, rep.info$n, rep.info$rep, paste(rep.info$.Random.seed, collapse = ","), as.character.error(ex) ), sep = "", file = log, append = TRUE) # add Traceback (see: `traceback()` which the following is addapted from) n <- length(x <- .traceback(NULL, max.lines = -1L)) if (n == 0L) { cat("No traceback available", "\n", file = log, append = TRUE) } else { for (i in 1L:n) { xi <- x[[i]] label <- paste0(n - i + 1L, ": ") m <- length(xi) srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) { srcfile <- attr(srcref, "srcfile") paste0(" at ", basename(srcfile$filename), "#", srcref[1L]) } if (isTRUE(attr(xi, "truncated"))) { xi <- c(xi, " ...") m <- length(xi) } if (!is.null(srcloc)) { xi[m] <- paste0(xi[m], srcloc) } if (m > 1) { label <- c(label, rep(substr(" ", 1L, nchar(label, type = "w")), m - 1L)) } cat(paste0(label, xi), sep = "\n", file = log, append = TRUE) } } } globalCallingHandlers(list( message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger )) ### for every sample size start <- format(Sys.time(), "%Y%m%dT%H%M") for (n in sample.sizes) { ### write new simulation result file file <- paste0(paste("sim-normal", start, n, sep = "-"), ".csv") # CSV header, used to ensure correct value/column mapping when writing to file header <- outer( c("dist.subspace", "dist.projection", "error.pred"), # measures c("gmlm", "pca", "hopca", "tsir"), # methods paste, sep = ".") cat(paste0(header, collapse = ","), "\n", sep = "", file = file) ### repeated simulation for (rep in seq_len(reps)) { ### Repetition session state info # Stores specific session variables before starting the current # simulation replication. This allows to log state information which # can be used to replicate a specific simulation repetition in case of # errors/warnings from the logs rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed) ### sample (training) data c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas) ### Fit data using different methods fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis, max.iter = max.iter) fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis) fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q)) fit.tsir <- TSIR(X, y, q, sample.axis = sample.axis) ### Compute Reductions `B.*` where `B.*` spans the reduction subspaces B.true <- Reduce(`%x%`, rev(Map(`%*%`, Omegas, alphas))) B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(Map(`%*%`, Omegas, alphas)))) B.hopca <- Reduce(`%x%`, rev(fit.hopca)) B.pca <- fit.pca$rotation B.tsir <- Reduce(`%x%`, rev(fit.tsir)) # Subspace Distances: Normalized `|| P_A - P_B ||_F` where # `P_A = A (A' A)^-1/2 A'` and the normalization means that with # respect to the dimensions of `A, B` the subspace distance is in the # range `[0, 1]`. dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE) dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE) dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE) dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE) # Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`. dist.projection.gmlm <- dist.projection(B.true, B.gmlm) dist.projection.hopca <- dist.projection(B.true, B.hopca) dist.projection.pca <- dist.projection(B.true, B.pca) dist.projection.tsir <- dist.projection(B.true, B.tsir) ### Prediction Errors: (using new independend sample of size `N`) c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas) # centered model matrix of vectorized `X`s vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE) P.true <- proj(B.true) error.pred.gmlm <- norm(P.true - proj(B.gmlm), "2") error.pred.hopca <- norm(P.true - proj(B.hopca), "2") error.pred.pca <- norm(P.true - proj(B.pca), "2") error.pred.tsir <- norm(P.true - proj(B.tsir), "2") # format estimation/prediction errors and write to file and console line <- paste0(Map(get, header), collapse = ",") cat(line, "\n", sep = "", file = file, append = TRUE) # report progress cat(sprintf("sample size: %d/%d - rep: %d/%d\n", which(n == sample.sizes), length(sample.sizes), rep, reps)) } }