tensor_predictors/sim/sim_1d_normal.R

library(tensorPredictors)
# library(RGCCA)
### Load modified version which _does not_ create a clusster in case of
### `n_cores == 1` allowing huge speed improvements! (at least on Ubuntu 22.04 LTS)
### Moreover, it is compatible with `Rscript`
### Also added `Encoding: UTF-8` in `DESCRIPTION`
devtools::load_all("~/Work/tensorPredictors/References/Software/TGCCA-modified", export_all = FALSE)


setwd("~/Work/tensorPredictors/sim/")
base.name <- format(Sys.time(), "sim_1d_normal-%Y%m%dT%H%M")

# Source utility function used in most simulations (extracted for convenience)
source("./sim_utils.R")

# Set PRNG seed for reproducability
# Note: `0x` is the HEX number prefix and the trailing `L` stands for "long"
# which is `R`s way if indicating an integer.
set.seed(0x1dL, "Mersenne-Twister", "Inversion", "Rejection")

### Simulation configuration
reps <- 100                     # number of simulation replications
sample.sizes <- c(100, 200, 300, 500, 750)  # sample sizes `n`
dimX <- c(2, 3, 5)              # predictor `X` dimension
dimF <- rep(2, length(dimX))    # "function" `F(y)` of responce `y` dimension

# setup true model parameters (all rank 1 betas)
betas <- Map(diag, 1, dimX, dimF)
Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), dimX)   # AR(0.5)
eta1 <- 0

# define projections onto tri-diagonal matrixes
proj.Omegas <- Map(.projBand, Map(dim, Omegas), 1L, 1L)
# and project Omegas
Omegas <- Map(do.call, proj.Omegas, Map(list, Omegas))

# data sampling routine
sample.data <- function(sample.size, eta1, betas, Omegas) {
    # responce is a standard normal variable
    y <- rnorm(sample.size)
    # F(y) is a tensor of monomials
    F <- sapply(y, function(yi) Reduce(outer, Map(`^`, yi, Map(seq, 0, len = dimF))))
    dim(F) <- c(dimF, sample.size)

    # sample predictors from tensor normal X | Y = y (last axis is sample axis)
    sample.axis <- length(betas) + 1L
    Deltas <- Map(solve, Omegas)                            # normal covariances
    mu_y <- mlm(mlm(F, betas) + as.vector(eta1), Deltas)    # conditional mean
    X <- mu_y + rtensornorm(sample.size, 0, Deltas, sample.axis)           # responses X

    list(X = X, F = F, y = y, sample.axis = sample.axis)
}

# Create a CSV logger to write simulation results to
log.file <- paste(base.name, "csv", sep = ".")
logger <- CSV.logger(
    file.name = log.file,
    header = c("sample.size", "rep", outer(
        c("time", "dist.subspace", "dist.projection"),  # measures
        c("gmlm", "pca", "hopca", "tsir", "mgcca"),     # methods
        paste, sep = "."
    ))
)

# compute true (full) model parameters to compair estimates against
B.true <- Reduce(`%x%`, rev(betas))

### for each sample size
for (sample.size in sample.sizes) {
    # repeate every simulation
    for (rep in seq_len(reps)) {
        # Sample training data
        c(X, F, y, sample.axis) %<-% sample.data(sample.size, eta1, betas, Omegas)

        # fit different models
        # Wrapped in try-catch clock to ensure the simulation continues,
        # if an error occures continue with nest resplication and log an error message
        try.catch.block <- tryCatch({
            time.gmlm <- system.time(
                fit.gmlm <- gmlm_tensor_normal(X, F, sample.axis = sample.axis,
                    proj.Omegas = proj.Omegas)
            )["user.self"]
            time.pca <- system.time(
                fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(dimF))
            )["user.self"]
            time.hopca <- system.time(
                fit.hopca <- HOPCA(X, npc = dimF, sample.axis = sample.axis)
            )["user.self"]
            time.tsir <- system.time(
                fit.tsir <- TSIR(X, y, dimF, sample.axis = sample.axis)
            )["user.self"]
            # `mgcca` expects the first axis to be the sample axis
            X1 <- aperm(X, c(sample.axis, seq_along(dim(X))[-sample.axis]))
            F1 <- cbind(y, y^2, y^3)
            time.mgcca <- system.time(
                fit.mgcca <- mgcca(
                    list(X1, F1),
                    quiet = TRUE,
                    scheme = "factorial",
                    ncomp = c(prod(dimF), 1L)
                )
            )["user.self"]
        }, error = print)

        # Drop comparison in case any error (in any fitting routine)
        if (inherits(try.catch.block, "error")) { next }

        # Compute true reduction matrix
        B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(betas)))
        B.pca <- fit.pca$rotation
        B.hopca <- Reduce(`%x%`, rev(fit.hopca))
        B.tsir <- Reduce(`%x%`, rev(fit.tsir))
        B.mgcca <- fit.mgcca$astar[[1]]

        # Subspace Distances: Normalized `|| P_A - P_B ||_F` where
        #   `P_A = A (A' A)^-1 A'` and the normalization means that with
        #   respect to the dimensions of `A, B` the subspace distance is in the
        #   range `[0, 1]`.
        dist.subspace.gmlm  <- dist.subspace(B.true, B.gmlm,  normalize = TRUE)
        dist.subspace.pca   <- dist.subspace(B.true, B.pca,   normalize = TRUE)
        dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
        dist.subspace.tsir  <- dist.subspace(B.true, B.tsir,  normalize = TRUE)
        dist.subspace.mgcca <- dist.subspace(B.true, B.mgcca, normalize = TRUE)

        # Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
        dist.projection.gmlm  <- dist.projection(B.true, B.gmlm)
        dist.projection.pca   <- dist.projection(B.true, B.pca)
        dist.projection.hopca <- dist.projection(B.true, B.hopca)
        dist.projection.tsir  <- dist.projection(B.true, B.tsir)
        dist.projection.mgcca <- dist.projection(B.true, B.mgcca)

        # Call CSV logger writing results to file
        logger()

        # print progress
        cat(sprintf("sample size (%d): %d/%d - rep: %d/%d\n",
            sample.size, which(sample.size == sample.sizes),
            length(sample.sizes), rep, reps))
    }
}


### read simulation results generate plots
if (!interactive()) { pdf(file = paste(base.name, "pdf", sep = ".")) }

sim <- read.csv(log.file)

plot.sim(sim, "dist.subspace", main = "Subspace Distance",
    xlab = "Sample Size", ylab = "Distance")

plot.sim(sim, "dist.projection", main = "Projection Distance",
    xlab = "Sample Size", ylab = "Distance")

plot.sim(sim, "time", main = "Runtime",
    xlab = "Sample Size", ylab = "Time [s]", ylim = c(0, 18))
add: tensor normal simulations 2023-11-21 11:25:35 +00:00			`library(tensorPredictors)`
			`# library(RGCCA)`
			`### Load modified version which _does not_ create a clusster in case of`
			### `n_cores == 1` allowing huge speed improvements! (at least on Ubuntu 22.04 LTS)
			### Moreover, it is compatible with `Rscript`
			### Also added `Encoding: UTF-8` in `DESCRIPTION`
			`devtools::load_all("~/Work/tensorPredictors/References/Software/TGCCA-modified", export_all = FALSE)`


			`setwd("~/Work/tensorPredictors/sim/")`
			`base.name <- format(Sys.time(), "sim_1d_normal-%Y%m%dT%H%M")`

			`# Source utility function used in most simulations (extracted for convenience)`
			`source("./sim_utils.R")`

			`# Set PRNG seed for reproducability`
			# Note: `0x` is the HEX number prefix and the trailing `L` stands for "long"
			# which is `R`s way if indicating an integer.
			`set.seed(0x1dL, "Mersenne-Twister", "Inversion", "Rejection")`

			`### Simulation configuration`
			`reps <- 100 # number of simulation replications`
			sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n`
			dimX <- c(2, 3, 5) # predictor `X` dimension
			dimF <- rep(2, length(dimX)) # "function" `F(y)` of responce `y` dimension

			`# setup true model parameters (all rank 1 betas)`
			`betas <- Map(diag, 1, dimX, dimF)`
			Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), dimX) # AR(0.5)
			`eta1 <- 0`

			`# define projections onto tri-diagonal matrixes`
			`proj.Omegas <- Map(.projBand, Map(dim, Omegas), 1L, 1L)`
			`# and project Omegas`
			`Omegas <- Map(do.call, proj.Omegas, Map(list, Omegas))`

			`# data sampling routine`
			`sample.data <- function(sample.size, eta1, betas, Omegas) {`
			`# responce is a standard normal variable`
			`y <- rnorm(sample.size)`
			`# F(y) is a tensor of monomials`
			F <- sapply(y, function(yi) Reduce(outer, Map(`^`, yi, Map(seq, 0, len = dimF))))
			`dim(F) <- c(dimF, sample.size)`

			`# sample predictors from tensor normal X \| Y = y (last axis is sample axis)`
			`sample.axis <- length(betas) + 1L`
			`Deltas <- Map(solve, Omegas) # normal covariances`
			`mu_y <- mlm(mlm(F, betas) + as.vector(eta1), Deltas) # conditional mean`
			`X <- mu_y + rtensornorm(sample.size, 0, Deltas, sample.axis) # responses X`

			`list(X = X, F = F, y = y, sample.axis = sample.axis)`
			`}`

			`# Create a CSV logger to write simulation results to`
			`log.file <- paste(base.name, "csv", sep = ".")`
			`logger <- CSV.logger(`
			`file.name = log.file,`
			`header = c("sample.size", "rep", outer(`
			`c("time", "dist.subspace", "dist.projection"), # measures`
			`c("gmlm", "pca", "hopca", "tsir", "mgcca"), # methods`
			`paste, sep = "."`
			`))`
			`)`

			`# compute true (full) model parameters to compair estimates against`
			B.true <- Reduce(`%x%`, rev(betas))

			`### for each sample size`
			`for (sample.size in sample.sizes) {`
			`# repeate every simulation`
			`for (rep in seq_len(reps)) {`
			`# Sample training data`
			`c(X, F, y, sample.axis) %<-% sample.data(sample.size, eta1, betas, Omegas)`

			`# fit different models`
			`# Wrapped in try-catch clock to ensure the simulation continues,`
			`# if an error occures continue with nest resplication and log an error message`
			`try.catch.block <- tryCatch({`
			`time.gmlm <- system.time(`
			`fit.gmlm <- gmlm_tensor_normal(X, F, sample.axis = sample.axis,`
			`proj.Omegas = proj.Omegas)`
			`)["user.self"]`
			`time.pca <- system.time(`
			`fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(dimF))`
			`)["user.self"]`
			`time.hopca <- system.time(`
			`fit.hopca <- HOPCA(X, npc = dimF, sample.axis = sample.axis)`
			`)["user.self"]`
			`time.tsir <- system.time(`
			`fit.tsir <- TSIR(X, y, dimF, sample.axis = sample.axis)`
			`)["user.self"]`
			# `mgcca` expects the first axis to be the sample axis
			`X1 <- aperm(X, c(sample.axis, seq_along(dim(X))[-sample.axis]))`
			`F1 <- cbind(y, y^2, y^3)`
			`time.mgcca <- system.time(`
			`fit.mgcca <- mgcca(`
			`list(X1, F1),`
			`quiet = TRUE,`
			`scheme = "factorial",`
			`ncomp = c(prod(dimF), 1L)`
			`)`
			`)["user.self"]`
			`}, error = print)`

			`# Drop comparison in case any error (in any fitting routine)`
			`if (inherits(try.catch.block, "error")) { next }`

			`# Compute true reduction matrix`
			B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(betas)))
			`B.pca <- fit.pca$rotation`
			B.hopca <- Reduce(`%x%`, rev(fit.hopca))
			B.tsir <- Reduce(`%x%`, rev(fit.tsir))
			`B.mgcca <- fit.mgcca$astar[[1]]`

			# Subspace Distances: Normalized `\|\| P_A - P_B \|\|_F` where
			# `P_A = A (A' A)^-1 A'` and the normalization means that with
			# respect to the dimensions of `A, B` the subspace distance is in the
			# range `[0, 1]`.
			`dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE)`
			`dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE)`
			`dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)`
			`dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE)`
			`dist.subspace.mgcca <- dist.subspace(B.true, B.mgcca, normalize = TRUE)`

			# Projection Distances: Spectral norm (2-norm) `\|\| P_A - P_B \|\|_2`.
			`dist.projection.gmlm <- dist.projection(B.true, B.gmlm)`
			`dist.projection.pca <- dist.projection(B.true, B.pca)`
			`dist.projection.hopca <- dist.projection(B.true, B.hopca)`
			`dist.projection.tsir <- dist.projection(B.true, B.tsir)`
			`dist.projection.mgcca <- dist.projection(B.true, B.mgcca)`

			`# Call CSV logger writing results to file`
			`logger()`

			`# print progress`
			`cat(sprintf("sample size (%d): %d/%d - rep: %d/%d\n",`
			`sample.size, which(sample.size == sample.sizes),`
			`length(sample.sizes), rep, reps))`
			`}`
			`}`




			`### read simulation results generate plots`
			`if (!interactive()) { pdf(file = paste(base.name, "pdf", sep = ".")) }`

			`sim <- read.csv(log.file)`

			`plot.sim(sim, "dist.subspace", main = "Subspace Distance",`
			`xlab = "Sample Size", ylab = "Distance")`

			`plot.sim(sim, "dist.projection", main = "Projection Distance",`
			`xlab = "Sample Size", ylab = "Distance")`

			`plot.sim(sim, "time", main = "Runtime",`
			`xlab = "Sample Size", ylab = "Time [s]", ylim = c(0, 18))`