library(tensorPredictors) # Set PRNG seed to the first 4 digits of the golden ratio for reproducability set.seed(1618L, "Mersenne-Twister", "Inversion", "Rejection") ### Simulation configuration reps <- 100 # number of simulation replications sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n` # Parameterize the "true" reductions on the dimension gen.beta <- function(pj) { as.matrix((-1)^seq_len(pj)) } # the precision matrices are simply diag(pj) # sampling from the conditional matrix normal `X | Y = y ~ N(mu(y), I_{p1 p2})` sample.data <- function(sample.size, betas, Omegas, eta1 = 0) { # responce is a standard normal variable y <- rnorm(sample.size) # F(y) is identical to y, except its a tensor (last axis is sample axis) F <- array(y, dim = c(mapply(ncol, betas), sample.size)) # sample predictors from tensor normal X | Y = y (last axis is sample axis) sample.axis <- length(betas) + 1L Deltas <- Map(solve, Omegas) # normal covariances mu_y <- mlm(mlm(F, betas) + as.vector(eta1), Deltas) # conditional mean X <- mu_y + rtensornorm(sample.size, 0, Deltas, sample.axis) # responses X list(X = X, F = F, y = y, sample.axis = sample.axis) } # Open simulation CSV log file log.name <- format(Sys.time(), "sim_efficiency-%Y%m%dT%H%M.csv") log.file <- file(log.name, "w") # Counts new number of writes purely here to write the CSV header the first time log.writes <- 0L # Setting p1 = p2 = pj (note, in the paper `p = p1 p2`) mode.dims <- round(1.2^unique(round(logb(2:32, 1.2)))) for (pj in mode.dims) { betas.true <- list(gen.beta(pj), gen.beta(pj)) B.true <- kronecker(betas.true[[2]], betas.true[[1]]) Omegas.true <- list(diag(pj), diag(pj)) for (sample.size in sample.sizes) { sim <- sapply(seq_len(reps), function(.) { c(X, F, y, sample.axis) %<-% sample.data(sample.size, betas.true, Omegas.true) ds.lm <- tryCatch({ B.lm <- unname(lm.fit(t(`dim<-`(X, c(pj^2, sample.size))), drop(F))$coefficients) dist.subspace(B.true, B.lm, normalize = TRUE) }, error = function(.) NA) # c(., betas.vec, Omegas.vec) %<-% gmlm_tensor_normal(`dim<-`(X, c(pj^2, sample.size)), drop(F)) c(., betas.gmlm, Omegas.gmlm) %<-% gmlm_tensor_normal(X, F) c(., betas.mani, Omegas.mani) %<-% gmlm_tensor_normal(X, F, proj.Omegas = rep(list(function(O) { diag(mean(diag(O)), nrow(O)) }), 2) ) # ds.vec <- dist.subspace(B.true, betas.vec[[1]], normalize = TRUE) ds.vec <- NA ds.gmlm <- dist.subspace(betas.true, betas.gmlm, normalize = TRUE) # equiv to R> dist.subspace(B.true, B.gmlm) ds.mani <- dist.subspace(betas.true, betas.mani, normalize = TRUE) c(lm = ds.lm, vec = ds.vec, gmlm = ds.gmlm, mani = ds.mani) }) sim <- as.data.frame(t(sim)) sim$sample.size <- sample.size sim$pj <- pj # Append current simulation results to log-file write.table(sim, file = log.file, sep = ",", row.names = FALSE, col.names = (log.writes <- log.writes + 1L) < 2L ) # print progress cat(sprintf("mode dim (%d): %d/%d - sample size (%d): %d/%d\n", pj, which(pj == mode.dims), length(mode.dims), sample.size, which(sample.size == sample.sizes), length(sample.sizes) )) } } close(log.file) # Read simulation data back in sim <- read.csv(log.name) with(merge( aggregate(sim, . ~ sample.size + pj, mean, na.rm = TRUE, na.action = na.pass), aggregate(sim, . ~ sample.size + pj, sd, na.rm = TRUE, na.action = na.pass), by = c("sample.size", "pj"), suffixes = c("", ".sd"), all = FALSE ), { plot(range(pj), 0:1, type = "n", main = "Simulation -- Efficiency Gain", xlab = expression(tilde(p)), ylab = expression(d(B, hat(B))) ) for (sz in sort(unique(sample.size))) { i <- order(pj)[(sample.size == sz)[order(pj)]] i <- i[!(is.na(lm[i]) | is.na(lm.sd[i]))] polygon(c(pj[i], rev(pj[i])), c(lm[i] + lm.sd[i], rev(lm[i] - lm.sd[i])), col = paste0("#cf7d03", "50"), border = NA ) i <- order(pj)[(sample.size == sz)[order(pj)]] polygon(c(pj[i], rev(pj[i])), c(vec[i] + vec.sd[i], rev(vec[i] - vec.sd[i])), col = paste0("#b30303", "50"), border = NA ) polygon(c(pj[i], rev(pj[i])), c(gmlm[i] + gmlm.sd[i], rev(gmlm[i] - gmlm.sd[i])), col = paste0("#002d8d", "50"), border = NA ) polygon(c(pj[i], rev(pj[i])), c(mani[i] + mani.sd[i], rev(mani[i] - mani.sd[i])), col = paste0("#006e18", "50"), border = NA ) } lty.idx <- 1L for (sz in sort(unique(sample.size))) { i <- order(pj)[(sample.size == sz)[order(pj)]] lines(pj[i], lm[i], type = "b", pch = 16, col = "#cf7d03", lty = lty.idx, lwd = 2) lines(pj[i], vec[i], type = "b", pch = 16, col = "#b30303", lty = lty.idx, lwd = 2) lines(pj[i], gmlm[i], type = "b", pch = 16, col = "#002d8d", lty = lty.idx, lwd = 2) lines(pj[i], mani[i], type = "b", pch = 16, col = "#006e18", lty = lty.idx, lwd = 2) lty.idx <- lty.idx + 1L } })