library(tensorPredictors) # library(RGCCA) ### Load modified version which _does not_ create a clusster in case of ### `n_cores == 1` allowing huge speed improvements! (at least on Ubuntu 22.04 LTS) ### Moreover, it is compatible with `Rscript` devtools::load_all("~/Work/tensorPredictors/References/Software/TGCCA-modified", export_all = FALSE) setwd("~/Work/tensorPredictors/sim/") base.name <- format(Sys.time(), "sim_1a_normal-%Y%m%dT%H%M") # Source utility function used in most simulations (extracted for convenience) source("./sim_utils.R") # Set PRNG seed for reproducability # Note: `0x` is the HEX number prefix and the trailing `L` stands for "long" # which is `R`s way if indicating an integer. set.seed(0x1aL, "Mersenne-Twister", "Inversion", "Rejection") ### Simulation configuration reps <- 100 # number of simulation replications sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n` dimX <- c(2, 3, 5) # predictor `X` dimension dimF <- rep(1, length(dimX)) # "function" `F(y)` of responce `y` dimension # setup true model parameters betas <- Map(diag, 1, dimX, dimF) Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), dimX) # AR(0.5) eta1 <- 0 # data sampling routine sample.data <- function(sample.size, eta1, betas, Omegas) { # responce is a standard normal variable y <- rnorm(sample.size) # F(y) is identical to y, except its a tensor (last axis is sample axis) F <- array(y, dim = c(mapply(ncol, betas), sample.size)) # sample predictors from tensor normal X | Y = y (last axis is sample axis) sample.axis <- length(betas) + 1L Deltas <- Map(solve, Omegas) # normal covariances mu_y <- mlm(mlm(F, betas) + as.vector(eta1), Deltas) # conditional mean X <- mu_y + rtensornorm(sample.size, 0, Deltas, sample.axis) # responses X list(X = X, F = F, y = y, sample.axis = sample.axis) } # Create a CSV logger to write simulation results to log.file <- paste(base.name, "csv", sep = ".") logger <- CSV.logger( file.name = log.file, header = c("sample.size", "rep", outer( c("time", "dist.subspace"), # measures c("gmlm", "pca", "hopca", "tsir", "mgcca", "hocca"), # methods paste, sep = "." ), "dist.subspace.init") ) # compute true (full) model parameters to compair estimates against B.true <- Reduce(`%x%`, rev(betas)) ### for each sample size for (sample.size in sample.sizes) { # repeate every simulation for (rep in seq_len(reps)) { # Sample training data c(X, F, y, sample.axis) %<-% sample.data(sample.size, eta1, betas, Omegas) # fit different models # Wrapped in try-catch clock to ensure the simulation continues, # if an error occures continue with nest resplication and log an error message tryCatch({ time.gmlm <- system.time( fit.gmlm <- gmlm_tensor_normal(X, F, sample.axis = sample.axis) )["user.self"] time.pca <- system.time( fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(dimF)) )["user.self"] time.hopca <- system.time( fit.hopca <- HOPCA(X, npc = dimF, sample.axis = sample.axis) )["user.self"] time.tsir <- system.time( fit.tsir <- TSIR(X, y, dimF, sample.axis = sample.axis) )["user.self"] # `mgcca` expects the first axis to be the sample axis X1 <- aperm(X, c(sample.axis, seq_along(dim(X))[-sample.axis])) F1 <- aperm(F, c(sample.axis, seq_along(dim(X))[-sample.axis])) time.mgcca <- system.time( fit.mgcca <- mgcca( list(X1, drop(F1)), # `drop` removes 1D axis (exception without) quiet = TRUE, scheme = "factorial", ncomp = rep(prod(dimF), 2), ranks = rep(prod(dimF), 2) ) )["user.self"] time.hocca <- system.time( fit.hocca <- HOCCA(X, F, sample.axis = sample.size) ) }, error = function(ex) { print(ex) }) # Compute true reduction matrix B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(betas))) B.init <- Reduce(`%x%`, rev(fit.gmlm$betas.init)) B.pca <- fit.pca$rotation B.hopca <- Reduce(`%x%`, rev(fit.hopca)) B.tsir <- Reduce(`%x%`, rev(fit.tsir)) B.mgcca <- fit.mgcca$astar[[1]] B.hocca <- Reduce(`%x%`, rev(fit.hocca$dirsX)) # Subspace Distances: Normalized `|| P_A - P_B ||_F` where # `P_A = A (A' A)^-1 A'` and the normalization means that with # respect to the dimensions of `A, B` the subspace distance is in the # range `[0, 1]`. dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE) dist.subspace.init <- dist.subspace(B.true, B.init, normalize = TRUE) dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE) dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE) dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE) dist.subspace.mgcca <- dist.subspace(B.true, B.mgcca, normalize = TRUE) dist.subspace.hocca <- dist.subspace(B.true, B.hocca, normalize = TRUE) # Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`. # Equiv to Subspace distance in 1D reduction case # Call CSV logger writing results to file logger() # print progress cat(sprintf("sample size (%d): %d/%d - rep: %d/%d\n", sample.size, which(sample.size == sample.sizes), length(sample.sizes), rep, reps)) } } ### read simulation results generate plots if (!interactive()) { pdf(file = paste(base.name, "pdf", sep = ".")) } sim <- read.csv(log.file) plot.sim(sim, "dist.subspace", main = "Subspace Distance", xlab = "Sample Size", ylab = "Distance") plot.sim(sim, "time", main = "Runtime", xlab = "Sample Size", ylab = "Time [s]", ylim = c(0, 18))