128 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
			
		
		
	
	
			128 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
library(tensorPredictors)
 | 
						|
 | 
						|
setwd("~/Work/tensorPredictors/sim/")
 | 
						|
base.name <- format(Sys.time(), "sim_tsir-%Y%m%dT%H%M")
 | 
						|
 | 
						|
# Source utility function used in most simulations (extracted for convenience)
 | 
						|
source("./sim_utils.R")
 | 
						|
 | 
						|
# Set PRNG seed for reproducability
 | 
						|
# Sequence 'T', 'S', 'I', 'R' in ASCII is 84, 83, 73, 82.
 | 
						|
set.seed(84837382L, "Mersenne-Twister", "Inversion", "Rejection")
 | 
						|
 | 
						|
### Simulation configuration
 | 
						|
reps <- 100                     # number of simulation replications
 | 
						|
sample.sizes <- c(100, 200, 300, 500, 750)  # sample sizes `n`
 | 
						|
signal.noise.ratios <- 2^(-3:4) # Signal to Noise Ratios (from 50/50 to very high)
 | 
						|
dimX <- c(2, 3, 5)              # predictor `X` dimension
 | 
						|
dimF <- rep(2, length(dimX))    # "function" `F(y)` of responce `y` dimension
 | 
						|
 | 
						|
# setup true model parameters
 | 
						|
eta1 <- 0
 | 
						|
# rank 1 betas
 | 
						|
betas <- Map(function(nr, nc) {
 | 
						|
    tcrossprod((-1)^seq_len(nr), (-1)^seq_len(nc))
 | 
						|
}, dimX, dimF)
 | 
						|
# True (minimal) reduction matrix
 | 
						|
B.true <- Reduce(kronecker, rev(betas))[, 1L, drop = FALSE]
 | 
						|
# GMLM second moment parameters (mode-wise precition matrices)
 | 
						|
Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), dimX)   # AR(0.5)
 | 
						|
# True (minimal) Gamma (Projection Direction)
 | 
						|
Gamma.true <- Reduce(kronecker, rev(Map(solve, Omegas, betas)))[, 1L, drop = FALSE]
 | 
						|
# true (full) covariance matrix
 | 
						|
covX.true <- Reduce(kronecker, rev(Map(solve, Omegas)))
 | 
						|
 | 
						|
# define projections onto rank 1 matrices for betas
 | 
						|
proj.betas <- Map(.projRank, rep(1L, length(betas)))
 | 
						|
 | 
						|
# data sampling routine
 | 
						|
sample.data <- function(sample.size, eta1, betas, Omegas, snr) {
 | 
						|
    # responce is a standard normal variable
 | 
						|
    y <- rnorm(sample.size)
 | 
						|
    # F(y) is a tensor of monomials
 | 
						|
    y.pow <- Reduce(function(a, b) outer(a, b, `+`), Map(seq, 0L, len = dimF))
 | 
						|
    F <- t(outer(y, as.vector(y.pow), `^`))
 | 
						|
    dim(F) <- c(dimF, sample.size)
 | 
						|
 | 
						|
    # sample predictors from tensor normal X | Y = y (last axis is sample axis)
 | 
						|
    sample.axis <- length(betas) + 1L
 | 
						|
    Deltas <- Map(solve, Omegas)                                    # normal covariances
 | 
						|
    mu_y <- mlm(mlm(F, betas) + as.vector(eta1), Deltas)            # conditional mean
 | 
						|
    noise <- rtensornorm(sample.size, 0, Deltas, sample.axis)       # error term
 | 
						|
    # scale noise to given signal to noise ratio
 | 
						|
    snr.est <- sd(mu_y) / sd(noise)
 | 
						|
    noise <- (snr.est / snr) * noise
 | 
						|
    X <- mu_y + noise                                               # responses X
 | 
						|
 | 
						|
    list(X = X, F = F, y = y, sample.axis = sample.axis)
 | 
						|
}
 | 
						|
 | 
						|
# Create a CSV logger to save simulation results
 | 
						|
log.file <- paste(base.name, "csv", sep = ".")
 | 
						|
logger <- CSV.logger(
 | 
						|
    file.name = log.file,
 | 
						|
    header = c(
 | 
						|
        "snr", "sample.size", "rep",
 | 
						|
        "dist.subspace.gmlm", "dist.subspace.tsir", "dist.subspace.partial", "dist.subspace.gamma"
 | 
						|
    )
 | 
						|
)
 | 
						|
 | 
						|
# different Signal to Noise Ratios
 | 
						|
for (snr in signal.noise.ratios) {
 | 
						|
 | 
						|
    # simulation for multiple data set sizes
 | 
						|
    for (sample.size in sample.sizes) {
 | 
						|
 | 
						|
        # simulation replications
 | 
						|
        for (rep in seq_len(reps)) {
 | 
						|
 | 
						|
            # sample a data set
 | 
						|
            c(X, F, y, sample.axis) %<-% sample.data(sample.size, eta1, betas, Omegas, snr)
 | 
						|
 | 
						|
            # call GMLM and TSIR
 | 
						|
            fit.gmlm <- gmlm_tensor_normal(X, F, sample.axis = sample.axis, proj.betas = proj.betas)
 | 
						|
            fit.tsir <- TSIR(X, y, c(1L, 1L, 1L), sample.axis = sample.axis)
 | 
						|
 | 
						|
            # GMLM, TSIR reduction estimates and TSIR (internal) projections
 | 
						|
            B.gmlm <- Reduce(kronecker, Map(function(beta) qr.Q(qr(beta))[, 1L, drop = FALSE], rev(fit.gmlm$betas)))
 | 
						|
            B.tsir <- Reduce(kronecker, rev(fit.tsir))
 | 
						|
            Gamma <- Reduce(kronecker, rev(attr(fit.tsir, "Gammas")))
 | 
						|
 | 
						|
            # Subspace distances to true minimal reduction
 | 
						|
            dist.subspace.gmlm    <- dist.subspace(B.true, B.gmlm, normalize = TRUE)
 | 
						|
            dist.subspace.tsir    <- dist.subspace(B.true, B.tsir, normalize = TRUE)
 | 
						|
            dist.subspace.partial <- dist.subspace(B.true, solve(covX.true, Gamma), normalize = TRUE)
 | 
						|
            dist.subspace.gamma   <- dist.subspace(Gamma.true, Gamma, normalize = TRUE)
 | 
						|
 | 
						|
            # Write to simulation log file (CSV file)
 | 
						|
            logger()
 | 
						|
 | 
						|
            # and print progress
 | 
						|
            cat(sprintf("SNR %.2f, sample size %d: rep: %d/%d\n", snr, sample.size, rep, reps))
 | 
						|
        }
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
### read simulation results generate plots
 | 
						|
if (!interactive()) { pdf(file = paste(base.name, "pdf", sep = ".")) }
 | 
						|
 | 
						|
# Read siulation results from log file
 | 
						|
sim <- read.csv(log.file)
 | 
						|
 | 
						|
# reset the correlation configuration parameter
 | 
						|
signal.noise.ratios <- sort(unique(sim$snr))
 | 
						|
 | 
						|
# build plot layout for every `snr` param
 | 
						|
ncols <- ceiling(sqrt(length(signal.noise.ratios)))
 | 
						|
nrows <- ceiling(length(signal.noise.ratios) / ncols)
 | 
						|
par(mfrow = c(nrows, ncols))
 | 
						|
 | 
						|
# One plot for every Singal to Noise Ratio
 | 
						|
for (.snr in signal.noise.ratios) {
 | 
						|
    plot.sim(subset(sim, snr == .snr), "dist.subspace",
 | 
						|
        main = sprintf("Signal to Noise Ratio: %.3f", .snr), xlab = "Sample Size", ylab = "Subspace Distance",
 | 
						|
        cols = c(gmlm = "black", tsir = "#009E73", partial = "orange", gamma = "skyblue")
 | 
						|
    )
 | 
						|
}
 |