186 lines
7.6 KiB
R
186 lines
7.6 KiB
R
library(tensorPredictors)
|
|
# library(RGCCA)
|
|
### Load modified version which _does not_ create a clusster in case of
|
|
### `n_cores == 1` allowing huge speed improvements! (at least on Ubuntu 22.04 LTS)
|
|
### Moreover, it is compatible with `Rscript`
|
|
### Also added `Encoding: UTF-8` in `DESCRIPTION`
|
|
devtools::load_all("~/Work/tensorPredictors/References/Software/TGCCA-modified", export_all = FALSE)
|
|
|
|
|
|
setwd("~/Work/tensorPredictors/sim/")
|
|
base.name <- format(Sys.time(), "sim_1b_normal-%Y%m%dT%H%M")
|
|
|
|
# Source utility function used in most simulations (extracted for convenience)
|
|
source("./sim_utils.R")
|
|
|
|
# Set PRNG seed for reproducability
|
|
# Note: `0x` is the HEX number prefix and the trailing `L` stands for "long"
|
|
# which is `R`s way if indicating an integer.
|
|
set.seed(0x1bL, "Mersenne-Twister", "Inversion", "Rejection")
|
|
|
|
### Simulation configuration
|
|
reps <- 100 # number of simulation replications
|
|
sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n`
|
|
validation.sizes <- 10000
|
|
dimX <- c(2, 3, 5) # predictor `X` dimension
|
|
dimF <- rep(2, length(dimX)) # "function" `F(y)` of responce `y` dimension
|
|
|
|
# setup true model parameters
|
|
betas <- Map(diag, 1, dimX, dimF)
|
|
Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), dimX) # AR(0.5)
|
|
eta1 <- 0
|
|
|
|
# data sampling routine
|
|
sample.data <- function(sample.size, eta1, betas, Omegas) {
|
|
# responce is a standard normal variable
|
|
y <- rnorm(sample.size)
|
|
# F(y) is a tensor of monomials
|
|
F <- sapply(y, function(yi) Reduce(outer, Map(`^`, yi, Map(seq, 0, len = dimF))))
|
|
dim(F) <- c(dimF, sample.size)
|
|
|
|
# sample predictors from tensor normal X | Y = y (last axis is sample axis)
|
|
sample.axis <- length(betas) + 1L
|
|
Deltas <- Map(solve, Omegas) # normal covariances
|
|
mu_y <- mlm(mlm(F, betas) + as.vector(eta1), Deltas) # conditional mean
|
|
X <- mu_y + rtensornorm(sample.size, 0, Deltas, sample.axis) # response
|
|
|
|
list(X = X, F = F, y = y, sample.axis = sample.axis)
|
|
}
|
|
|
|
# Create a CSV logger to write simulation results to
|
|
log.file <- paste(base.name, "csv", sep = ".")
|
|
logger <- CSV.logger(
|
|
file.name = log.file,
|
|
header = c("sample.size", "rep", outer(
|
|
c("dist.subspace", "dist.projection"), # measures
|
|
c("gmlm", "tsir", "hopca"), # methods
|
|
paste, sep = "."
|
|
), outer(
|
|
c("time", "dist.min.subspace", "dist.min.projection", "reconst.error"), # measures
|
|
c("gmlm", "pca", "hopca", "tsir", "mgcca"), # methods
|
|
paste, sep = "."
|
|
))
|
|
)
|
|
|
|
# compute true (full) model parameters to compair estimates against
|
|
B.true <- Reduce(`%x%`, rev(betas))
|
|
minimal <- function(B) { cbind(
|
|
"1" = B[, 1],
|
|
"y" = rowSums(B[, c(2, 3, 5)]),
|
|
"y^2" = rowSums(B[, c(4, 6, 7)]),
|
|
"y^3" = B[, 8]
|
|
) }
|
|
B.min.true <- minimal(B.true)
|
|
|
|
### for each sample size
|
|
for (sample.size in sample.sizes) {
|
|
# repeate every simulation
|
|
for (rep in seq_len(reps)) {
|
|
# Sample training data
|
|
c(X, F, y, sample.axis) %<-% sample.data(sample.size, eta1, betas, Omegas)
|
|
|
|
# fit different models
|
|
# Wrapped in try-catch clock to ensure the simulation continues,
|
|
# if an error occures continue with nest resplication and log an error message
|
|
tryCatch({
|
|
time.gmlm <- system.time(
|
|
fit.gmlm <- gmlm_tensor_normal(X, F, sample.axis = sample.axis)
|
|
)["user.self"]
|
|
time.pca <- system.time(
|
|
fit.pca <- prcomp(mat(X, sample.axis), rank. = 4)
|
|
)["user.self"]
|
|
time.hopca <- system.time(
|
|
fit.hopca <- HOPCA(X, npc = dimF, sample.axis = sample.axis)
|
|
)["user.self"]
|
|
time.tsir <- system.time(
|
|
fit.tsir <- TSIR(X, y, dimF, sample.axis = sample.axis)
|
|
)["user.self"]
|
|
# `mgcca` expects the first axis to be the sample axis
|
|
X.perm <- aperm(X, c(sample.axis, seq_along(dim(X))[-sample.axis]))
|
|
F.min <- mat(F, sample.axis)[, c(2, 4, 8)]
|
|
time.mgcca <- system.time(
|
|
fit.mgcca <- mgcca(
|
|
list(X.perm, F.min), # `drop` removes 1D axis
|
|
quiet = TRUE,
|
|
scheme = "factorial",
|
|
ncomp = c(4, 1)
|
|
)
|
|
)["user.self"]
|
|
}, error = function(ex) {
|
|
print(ex)
|
|
})
|
|
|
|
# Compute true reduction matrix
|
|
B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(betas)))
|
|
B.hopca <- Reduce(`%x%`, rev(fit.hopca))
|
|
B.tsir <- Reduce(`%x%`, rev(fit.tsir))
|
|
|
|
# and minimal true reductions if not already minimal
|
|
B.min.gmlm <- minimal(B.gmlm)
|
|
B.min.pca <- fit.pca$rotation
|
|
B.min.hopca <- B.hopca[, 1:4]
|
|
B.min.tsir <- La.svd(B.tsir, 4L, 0L)$u
|
|
B.min.mgcca <- fit.mgcca$astar[[1]]
|
|
|
|
|
|
# Subspace Distances: Normalized `|| P_A - P_B ||_F` where
|
|
# `P_A = A (A' A)^-1 A'` and the normalization means that with
|
|
# respect to the dimensions of `A, B` the subspace distance is in the
|
|
# range `[0, 1]`.
|
|
dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE)
|
|
dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
|
|
dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE)
|
|
|
|
dist.min.subspace.gmlm <- dist.subspace(B.min.true, B.min.gmlm, normalize = TRUE)
|
|
dist.min.subspace.pca <- dist.subspace(B.min.true, B.min.pca, normalize = TRUE)
|
|
dist.min.subspace.hopca <- dist.subspace(B.min.true, B.min.hopca, normalize = TRUE)
|
|
dist.min.subspace.tsir <- dist.subspace(B.min.true, B.min.tsir, normalize = TRUE)
|
|
dist.min.subspace.mgcca <- dist.subspace(B.min.true, B.min.mgcca, normalize = TRUE)
|
|
|
|
# Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
|
|
dist.projection.gmlm <- dist.projection(B.true, B.gmlm)
|
|
dist.projection.hopca <- dist.projection(B.true, B.hopca)
|
|
dist.projection.tsir <- dist.projection(B.true, B.tsir)
|
|
|
|
dist.min.projection.gmlm <- dist.projection(B.min.true, B.min.gmlm)
|
|
dist.min.projection.pca <- dist.projection(B.min.true, B.min.pca)
|
|
dist.min.projection.hopca <- dist.projection(B.min.true, B.min.hopca)
|
|
dist.min.projection.tsir <- dist.projection(B.min.true, B.min.tsir)
|
|
dist.min.projection.mgcca <- dist.projection(B.min.true, B.min.mgcca)
|
|
|
|
# # Reconstruction error (MSE) of y given X with a new sample
|
|
# c(X, F, y, sample.axis) %<-% sample.data(validation.sizes, eta1, betas, Omegas)
|
|
# y.gmlm <- rowMeans(mat(mlm(X, fit.gmlm$betas), sample.axis)[, c(2, 3, 5)])
|
|
|
|
|
|
# Call CSV logger writing results to file
|
|
logger()
|
|
|
|
# print progress
|
|
cat(sprintf("sample size (%d): %d/%d - rep: %d/%d\n",
|
|
sample.size, which(sample.size == sample.sizes),
|
|
length(sample.sizes), rep, reps))
|
|
}
|
|
}
|
|
|
|
|
|
### read simulation results and generate plots
|
|
if (!interactive()) { pdf(file = paste(base.name, "pdf", sep = ".")) }
|
|
|
|
sim <- read.csv(log.file)
|
|
|
|
plot.sim(sim, "dist.subspace", main = "Full Subspace Distance",
|
|
xlab = "Sample Size", ylab = "Distance")
|
|
|
|
plot.sim(sim, "dist.min.subspace", main = "Min Subspace Distance",
|
|
xlab = "Sample Size", ylab = "Distance")
|
|
|
|
plot.sim(sim, "dist.projection", main = "Full Projection Distance",
|
|
xlab = "Sample Size", ylab = "Distance")
|
|
|
|
plot.sim(sim, "dist.min.projection", main = "Min Projection Distance",
|
|
xlab = "Sample Size", ylab = "Distance")
|
|
|
|
plot.sim(sim, "time", main = "Runtime",
|
|
xlab = "Sample Size", ylab = "Time")
|