library(tensorPredictors) setwd("~/Work/tensorPredictors/sim/") base.name <- format(Sys.time(), "sim_tsir-%Y%m%dT%H%M") # Source utility function used in most simulations (extracted for convenience) source("./sim_utils.R") # Set PRNG seed for reproducability # Sequence 'T', 'S', 'I', 'R' in ASCII is 84, 83, 73, 82. set.seed(84837382L, "Mersenne-Twister", "Inversion", "Rejection") ### Simulation configuration reps <- 100 # number of simulation replications sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n` signal.noise.ratios <- 2^(-3:4) # Signal to Noise Ratios (from 50/50 to very high) dimX <- c(2, 3, 5) # predictor `X` dimension dimF <- rep(2, length(dimX)) # "function" `F(y)` of responce `y` dimension # setup true model parameters eta1 <- 0 # rank 1 betas betas <- Map(function(nr, nc) { tcrossprod((-1)^seq_len(nr), (-1)^seq_len(nc)) }, dimX, dimF) # True (minimal) reduction matrix B.true <- Reduce(kronecker, rev(betas))[, 1L, drop = FALSE] # GMLM second moment parameters (mode-wise precition matrices) Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), dimX) # AR(0.5) # True (minimal) Gamma (Projection Direction) Gamma.true <- Reduce(kronecker, rev(Map(solve, Omegas, betas)))[, 1L, drop = FALSE] # true (full) covariance matrix covX.true <- Reduce(kronecker, rev(Map(solve, Omegas))) # define projections onto rank 1 matrices for betas proj.betas <- Map(.projRank, rep(1L, length(betas))) # data sampling routine sample.data <- function(sample.size, eta1, betas, Omegas, snr) { # responce is a standard normal variable y <- rnorm(sample.size) # F(y) is a tensor of monomials y.pow <- Reduce(function(a, b) outer(a, b, `+`), Map(seq, 0L, len = dimF)) F <- t(outer(y, as.vector(y.pow), `^`)) dim(F) <- c(dimF, sample.size) # sample predictors from tensor normal X | Y = y (last axis is sample axis) sample.axis <- length(betas) + 1L Deltas <- Map(solve, Omegas) # normal covariances mu_y <- mlm(mlm(F, betas) + as.vector(eta1), Deltas) # conditional mean noise <- rtensornorm(sample.size, 0, Deltas, sample.axis) # error term # scale noise to given signal to noise ratio snr.est <- sd(mu_y) / sd(noise) noise <- (snr.est / snr) * noise X <- mu_y + noise # responses X list(X = X, F = F, y = y, sample.axis = sample.axis) } # Create a CSV logger to save simulation results log.file <- paste(base.name, "csv", sep = ".") logger <- CSV.logger( file.name = log.file, header = c( "snr", "sample.size", "rep", "dist.subspace.gmlm", "dist.subspace.tsir", "dist.subspace.partial", "dist.subspace.gamma" ) ) # different Signal to Noise Ratios for (snr in signal.noise.ratios) { # simulation for multiple data set sizes for (sample.size in sample.sizes) { # simulation replications for (rep in seq_len(reps)) { # sample a data set c(X, F, y, sample.axis) %<-% sample.data(sample.size, eta1, betas, Omegas, snr) # call GMLM and TSIR fit.gmlm <- gmlm_tensor_normal(X, F, sample.axis = sample.axis, proj.betas = proj.betas) fit.tsir <- TSIR(X, y, c(1L, 1L, 1L), sample.axis = sample.axis) # GMLM, TSIR reduction estimates and TSIR (internal) projections B.gmlm <- Reduce(kronecker, Map(function(beta) qr.Q(qr(beta))[, 1L, drop = FALSE], rev(fit.gmlm$betas))) B.tsir <- Reduce(kronecker, rev(fit.tsir)) Gamma <- Reduce(kronecker, rev(attr(fit.tsir, "Gammas"))) # Subspace distances to true minimal reduction dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE) dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE) dist.subspace.partial <- dist.subspace(B.true, solve(covX.true, Gamma), normalize = TRUE) dist.subspace.gamma <- dist.subspace(Gamma.true, Gamma, normalize = TRUE) # Write to simulation log file (CSV file) logger() # and print progress cat(sprintf("SNR %.2f, sample size %d: rep: %d/%d\n", snr, sample.size, rep, reps)) } } } ### read simulation results generate plots if (!interactive()) { pdf(file = paste(base.name, "pdf", sep = ".")) } # Read siulation results from log file sim <- read.csv(log.file) # reset the correlation configuration parameter signal.noise.ratios <- sort(unique(sim$snr)) # build plot layout for every `snr` param ncols <- ceiling(sqrt(length(signal.noise.ratios))) nrows <- ceiling(length(signal.noise.ratios) / ncols) par(mfrow = c(nrows, ncols)) # One plot for every Singal to Noise Ratio for (.snr in signal.noise.ratios) { plot.sim(subset(sim, snr == .snr), "dist.subspace", main = sprintf("Signal to Noise Ratio: %.3f", .snr), xlab = "Sample Size", ylab = "Subspace Distance", cols = c(gmlm = "black", tsir = "#009E73", partial = "orange", gamma = "skyblue") ) }