186 lines
		
	
	
		
			7.6 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
			
		
		
	
	
			186 lines
		
	
	
		
			7.6 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
| library(tensorPredictors)
 | |
| # library(RGCCA)
 | |
| ### Load modified version which _does not_ create a clusster in case of
 | |
| ### `n_cores == 1` allowing huge speed improvements! (at least on Ubuntu 22.04 LTS)
 | |
| ### Moreover, it is compatible with `Rscript`
 | |
| ### Also added `Encoding: UTF-8` in `DESCRIPTION`
 | |
| devtools::load_all("~/Work/tensorPredictors/References/Software/TGCCA-modified", export_all = FALSE)
 | |
| 
 | |
| 
 | |
| setwd("~/Work/tensorPredictors/sim/")
 | |
| base.name <- format(Sys.time(), "sim_1b_normal-%Y%m%dT%H%M")
 | |
| 
 | |
| # Source utility function used in most simulations (extracted for convenience)
 | |
| source("./sim_utils.R")
 | |
| 
 | |
| # Set PRNG seed for reproducability
 | |
| # Note: `0x` is the HEX number prefix and the trailing `L` stands for "long"
 | |
| # which is `R`s way if indicating an integer.
 | |
| set.seed(0x1bL, "Mersenne-Twister", "Inversion", "Rejection")
 | |
| 
 | |
| ### Simulation configuration
 | |
| reps <- 100                     # number of simulation replications
 | |
| sample.sizes <- c(100, 200, 300, 500, 750)  # sample sizes `n`
 | |
| validation.sizes <- 10000
 | |
| dimX <- c(2, 3, 5)              # predictor `X` dimension
 | |
| dimF <- rep(2, length(dimX))    # "function" `F(y)` of responce `y` dimension
 | |
| 
 | |
| # setup true model parameters
 | |
| betas <- Map(diag, 1, dimX, dimF)
 | |
| Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), dimX)   # AR(0.5)
 | |
| eta1 <- 0
 | |
| 
 | |
| # data sampling routine
 | |
| sample.data <- function(sample.size, eta1, betas, Omegas) {
 | |
|     # responce is a standard normal variable
 | |
|     y <- rnorm(sample.size)
 | |
|     # F(y) is a tensor of monomials
 | |
|     F <- sapply(y, function(yi) Reduce(outer, Map(`^`, yi, Map(seq, 0, len = dimF))))
 | |
|     dim(F) <- c(dimF, sample.size)
 | |
| 
 | |
|     # sample predictors from tensor normal X | Y = y (last axis is sample axis)
 | |
|     sample.axis <- length(betas) + 1L
 | |
|     Deltas <- Map(solve, Omegas)                            # normal covariances
 | |
|     mu_y <- mlm(mlm(F, betas) + as.vector(eta1), Deltas)    # conditional mean
 | |
|     X <- mu_y + rtensornorm(sample.size, 0, Deltas, sample.axis)    # response
 | |
| 
 | |
|     list(X = X, F = F, y = y, sample.axis = sample.axis)
 | |
| }
 | |
| 
 | |
| # Create a CSV logger to write simulation results to
 | |
| log.file <- paste(base.name, "csv", sep = ".")
 | |
| logger <- CSV.logger(
 | |
|     file.name = log.file,
 | |
|     header = c("sample.size", "rep", outer(
 | |
|             c("dist.subspace", "dist.projection"),                  # measures
 | |
|             c("gmlm", "tsir", "hopca"),                             # methods
 | |
|             paste, sep = "."
 | |
|         ), outer(
 | |
|             c("time", "dist.min.subspace", "dist.min.projection", "reconst.error"),  # measures
 | |
|             c("gmlm", "pca", "hopca", "tsir", "mgcca"),             # methods
 | |
|             paste, sep = "."
 | |
|     ))
 | |
| )
 | |
| 
 | |
| # compute true (full) model parameters to compair estimates against
 | |
| B.true <- Reduce(`%x%`, rev(betas))
 | |
| minimal <- function(B) { cbind(
 | |
|     "1"   = B[, 1],
 | |
|     "y"   = rowSums(B[, c(2, 3, 5)]),
 | |
|     "y^2" = rowSums(B[, c(4, 6, 7)]),
 | |
|     "y^3" = B[, 8]
 | |
| ) }
 | |
| B.min.true <- minimal(B.true)
 | |
| 
 | |
| ### for each sample size
 | |
| for (sample.size in sample.sizes) {
 | |
|     # repeate every simulation
 | |
|     for (rep in seq_len(reps)) {
 | |
|         # Sample training data
 | |
|         c(X, F, y, sample.axis) %<-% sample.data(sample.size, eta1, betas, Omegas)
 | |
| 
 | |
|         # fit different models
 | |
|         # Wrapped in try-catch clock to ensure the simulation continues,
 | |
|         # if an error occures continue with nest resplication and log an error message
 | |
|         tryCatch({
 | |
|             time.gmlm <- system.time(
 | |
|                 fit.gmlm <- gmlm_tensor_normal(X, F, sample.axis = sample.axis)
 | |
|             )["user.self"]
 | |
|             time.pca <- system.time(
 | |
|                 fit.pca <- prcomp(mat(X, sample.axis), rank. = 4)
 | |
|             )["user.self"]
 | |
|             time.hopca <- system.time(
 | |
|                 fit.hopca <- HOPCA(X, npc = dimF, sample.axis = sample.axis)
 | |
|             )["user.self"]
 | |
|             time.tsir <- system.time(
 | |
|                 fit.tsir <- TSIR(X, y, dimF, sample.axis = sample.axis)
 | |
|             )["user.self"]
 | |
|             # `mgcca` expects the first axis to be the sample axis
 | |
|             X.perm <- aperm(X, c(sample.axis, seq_along(dim(X))[-sample.axis]))
 | |
|             F.min <- mat(F, sample.axis)[, c(2, 4, 8)]
 | |
|             time.mgcca <- system.time(
 | |
|                 fit.mgcca <- mgcca(
 | |
|                     list(X.perm, F.min), # `drop` removes 1D axis
 | |
|                     quiet = TRUE,
 | |
|                     scheme = "factorial",
 | |
|                     ncomp = c(4, 1)
 | |
|                 )
 | |
|             )["user.self"]
 | |
|         }, error = function(ex) {
 | |
|             print(ex)
 | |
|         })
 | |
| 
 | |
|         # Compute true reduction matrix
 | |
|         B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(betas)))
 | |
|         B.hopca <- Reduce(`%x%`, rev(fit.hopca))
 | |
|         B.tsir <- Reduce(`%x%`, rev(fit.tsir))
 | |
| 
 | |
|         # and minimal true reductions if not already minimal
 | |
|         B.min.gmlm <- minimal(B.gmlm)
 | |
|         B.min.pca <- fit.pca$rotation
 | |
|         B.min.hopca <- B.hopca[, 1:4]
 | |
|         B.min.tsir <- La.svd(B.tsir, 4L, 0L)$u
 | |
|         B.min.mgcca <- fit.mgcca$astar[[1]]
 | |
| 
 | |
| 
 | |
|         # Subspace Distances: Normalized `|| P_A - P_B ||_F` where
 | |
|         #   `P_A = A (A' A)^-1 A'` and the normalization means that with
 | |
|         #   respect to the dimensions of `A, B` the subspace distance is in the
 | |
|         #   range `[0, 1]`.
 | |
|         dist.subspace.gmlm  <- dist.subspace(B.true, B.gmlm,  normalize = TRUE)
 | |
|         dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
 | |
|         dist.subspace.tsir  <- dist.subspace(B.true, B.tsir,  normalize = TRUE)
 | |
| 
 | |
|         dist.min.subspace.gmlm  <- dist.subspace(B.min.true, B.min.gmlm,  normalize = TRUE)
 | |
|         dist.min.subspace.pca   <- dist.subspace(B.min.true, B.min.pca,   normalize = TRUE)
 | |
|         dist.min.subspace.hopca <- dist.subspace(B.min.true, B.min.hopca, normalize = TRUE)
 | |
|         dist.min.subspace.tsir  <- dist.subspace(B.min.true, B.min.tsir,  normalize = TRUE)
 | |
|         dist.min.subspace.mgcca <- dist.subspace(B.min.true, B.min.mgcca, normalize = TRUE)
 | |
| 
 | |
|         # Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
 | |
|         dist.projection.gmlm  <- dist.projection(B.true, B.gmlm)
 | |
|         dist.projection.hopca <- dist.projection(B.true, B.hopca)
 | |
|         dist.projection.tsir  <- dist.projection(B.true, B.tsir)
 | |
| 
 | |
|         dist.min.projection.gmlm  <- dist.projection(B.min.true, B.min.gmlm)
 | |
|         dist.min.projection.pca   <- dist.projection(B.min.true, B.min.pca)
 | |
|         dist.min.projection.hopca <- dist.projection(B.min.true, B.min.hopca)
 | |
|         dist.min.projection.tsir  <- dist.projection(B.min.true, B.min.tsir)
 | |
|         dist.min.projection.mgcca <- dist.projection(B.min.true, B.min.mgcca)
 | |
| 
 | |
|         # # Reconstruction error (MSE) of y given X with a new sample
 | |
|         # c(X, F, y, sample.axis) %<-% sample.data(validation.sizes, eta1, betas, Omegas)
 | |
|         # y.gmlm <- rowMeans(mat(mlm(X, fit.gmlm$betas), sample.axis)[, c(2, 3, 5)])
 | |
| 
 | |
| 
 | |
|         # Call CSV logger writing results to file
 | |
|         logger()
 | |
| 
 | |
|         # print progress
 | |
|         cat(sprintf("sample size (%d): %d/%d - rep: %d/%d\n",
 | |
|             sample.size, which(sample.size == sample.sizes),
 | |
|             length(sample.sizes), rep, reps))
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| ### read simulation results and generate plots
 | |
| if (!interactive()) { pdf(file = paste(base.name, "pdf", sep = ".")) }
 | |
| 
 | |
| sim <- read.csv(log.file)
 | |
| 
 | |
| plot.sim(sim, "dist.subspace", main = "Full Subspace Distance",
 | |
|     xlab = "Sample Size", ylab = "Distance")
 | |
| 
 | |
| plot.sim(sim, "dist.min.subspace", main = "Min Subspace Distance",
 | |
|     xlab = "Sample Size", ylab = "Distance")
 | |
| 
 | |
| plot.sim(sim, "dist.projection", main = "Full Projection Distance",
 | |
|     xlab = "Sample Size", ylab = "Distance")
 | |
| 
 | |
| plot.sim(sim, "dist.min.projection", main = "Min Projection Distance",
 | |
|     xlab = "Sample Size", ylab = "Distance")
 | |
| 
 | |
| plot.sim(sim, "time", main = "Runtime",
 | |
|     xlab = "Sample Size", ylab = "Time")
 |