add: tensor normal simulations
This commit is contained in:
parent
399f878fbb
commit
39555ec1fa
|
@ -108,8 +108,17 @@ simulations/
|
||||||
!**/LaTeX/*.bib
|
!**/LaTeX/*.bib
|
||||||
**/LaTeX/*-blx.bib
|
**/LaTeX/*-blx.bib
|
||||||
|
|
||||||
|
# Include subfolders for images and plots
|
||||||
|
!**/LaTeX/plots/
|
||||||
|
**/LaTeX/plots/*
|
||||||
|
!**/LaTeX/plots/*.tex
|
||||||
|
!**/LaTeX/images/
|
||||||
|
**/LaTeX/images/*
|
||||||
|
!**/LaTeX/images/*.tex
|
||||||
|
|
||||||
mlda_analysis/
|
mlda_analysis/
|
||||||
References/
|
References/
|
||||||
|
dataAnalysis/
|
||||||
*.csv
|
*.csv
|
||||||
*.csv.log
|
*.csv.log
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,148 @@
|
||||||
|
library(tensorPredictors)
|
||||||
|
# library(RGCCA)
|
||||||
|
### Load modified version which _does not_ create a clusster in case of
|
||||||
|
### `n_cores == 1` allowing huge speed improvements! (at least on Ubuntu 22.04 LTS)
|
||||||
|
### Moreover, it is compatible with `Rscript`
|
||||||
|
devtools::load_all("~/Work/tensorPredictors/References/Software/TGCCA-modified", export_all = FALSE)
|
||||||
|
|
||||||
|
|
||||||
|
setwd("~/Work/tensorPredictors/sim/")
|
||||||
|
base.name <- format(Sys.time(), "sim_1a_normal-%Y%m%dT%H%M")
|
||||||
|
|
||||||
|
# Source utility function used in most simulations (extracted for convenience)
|
||||||
|
source("./sim_utils.R")
|
||||||
|
|
||||||
|
# Set PRNG seed for reproducability
|
||||||
|
# Note: `0x` is the HEX number prefix and the trailing `L` stands for "long"
|
||||||
|
# which is `R`s way if indicating an integer.
|
||||||
|
set.seed(0x1aL, "Mersenne-Twister", "Inversion", "Rejection")
|
||||||
|
|
||||||
|
### Simulation configuration
|
||||||
|
reps <- 100 # number of simulation replications
|
||||||
|
sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n`
|
||||||
|
dimX <- c(2, 3, 5) # predictor `X` dimension
|
||||||
|
dimF <- rep(1, length(dimX)) # "function" `F(y)` of responce `y` dimension
|
||||||
|
|
||||||
|
# setup true model parameters
|
||||||
|
betas <- Map(diag, 1, dimX, dimF)
|
||||||
|
Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), dimX) # AR(0.5)
|
||||||
|
eta1 <- 0
|
||||||
|
|
||||||
|
# data sampling routine
|
||||||
|
sample.data <- function(sample.size, eta1, betas, Omegas) {
|
||||||
|
# responce is a standard normal variable
|
||||||
|
y <- rnorm(sample.size)
|
||||||
|
# F(y) is identical to y, except its a tensor (last axis is sample axis)
|
||||||
|
F <- array(y, dim = c(mapply(ncol, betas), sample.size))
|
||||||
|
|
||||||
|
# sample predictors from tensor normal X | Y = y (last axis is sample axis)
|
||||||
|
sample.axis <- length(betas) + 1L
|
||||||
|
Deltas <- Map(solve, Omegas) # normal covariances
|
||||||
|
mu_y <- mlm(mlm(F, betas) + as.vector(eta1), Deltas) # conditional mean
|
||||||
|
X <- mu_y + rtensornorm(sample.size, 0, Deltas, sample.axis) # responses X
|
||||||
|
|
||||||
|
list(X = X, F = F, y = y, sample.axis = sample.axis)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create a CSV logger to write simulation results to
|
||||||
|
log.file <- paste(base.name, "csv", sep = ".")
|
||||||
|
logger <- CSV.logger(
|
||||||
|
file.name = log.file,
|
||||||
|
header = c("sample.size", "rep", outer(
|
||||||
|
c("time", "dist.subspace"), # measures
|
||||||
|
c("gmlm", "pca", "hopca", "tsir", "mgcca", "hocca"), # methods
|
||||||
|
paste, sep = "."
|
||||||
|
), "dist.subspace.init")
|
||||||
|
)
|
||||||
|
|
||||||
|
# compute true (full) model parameters to compair estimates against
|
||||||
|
B.true <- Reduce(`%x%`, rev(betas))
|
||||||
|
|
||||||
|
### for each sample size
|
||||||
|
for (sample.size in sample.sizes) {
|
||||||
|
# repeate every simulation
|
||||||
|
for (rep in seq_len(reps)) {
|
||||||
|
# Sample training data
|
||||||
|
c(X, F, y, sample.axis) %<-% sample.data(sample.size, eta1, betas, Omegas)
|
||||||
|
|
||||||
|
# fit different models
|
||||||
|
# Wrapped in try-catch clock to ensure the simulation continues,
|
||||||
|
# if an error occures continue with nest resplication and log an error message
|
||||||
|
tryCatch({
|
||||||
|
time.gmlm <- system.time(
|
||||||
|
fit.gmlm <- gmlm_tensor_normal(X, F, sample.axis = sample.axis)
|
||||||
|
)["user.self"]
|
||||||
|
time.pca <- system.time(
|
||||||
|
fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(dimF))
|
||||||
|
)["user.self"]
|
||||||
|
time.hopca <- system.time(
|
||||||
|
fit.hopca <- HOPCA(X, npc = dimF, sample.axis = sample.axis)
|
||||||
|
)["user.self"]
|
||||||
|
time.tsir <- system.time(
|
||||||
|
fit.tsir <- TSIR(X, y, dimF, sample.axis = sample.axis)
|
||||||
|
)["user.self"]
|
||||||
|
# `mgcca` expects the first axis to be the sample axis
|
||||||
|
X1 <- aperm(X, c(sample.axis, seq_along(dim(X))[-sample.axis]))
|
||||||
|
F1 <- aperm(F, c(sample.axis, seq_along(dim(X))[-sample.axis]))
|
||||||
|
time.mgcca <- system.time(
|
||||||
|
fit.mgcca <- mgcca(
|
||||||
|
list(X1, drop(F1)), # `drop` removes 1D axis (exception without)
|
||||||
|
quiet = TRUE,
|
||||||
|
scheme = "factorial",
|
||||||
|
ncomp = rep(prod(dimF), 2),
|
||||||
|
ranks = rep(prod(dimF), 2)
|
||||||
|
)
|
||||||
|
)["user.self"]
|
||||||
|
time.hocca <- system.time(
|
||||||
|
fit.hocca <- HOCCA(X, F, sample.axis = sample.size)
|
||||||
|
)
|
||||||
|
}, error = function(ex) {
|
||||||
|
print(ex)
|
||||||
|
})
|
||||||
|
|
||||||
|
# Compute true reduction matrix
|
||||||
|
B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(betas)))
|
||||||
|
B.init <- Reduce(`%x%`, rev(fit.gmlm$betas.init))
|
||||||
|
B.pca <- fit.pca$rotation
|
||||||
|
B.hopca <- Reduce(`%x%`, rev(fit.hopca))
|
||||||
|
B.tsir <- Reduce(`%x%`, rev(fit.tsir))
|
||||||
|
B.mgcca <- fit.mgcca$astar[[1]]
|
||||||
|
B.hocca <- Reduce(`%x%`, rev(fit.hocca$dirsX))
|
||||||
|
|
||||||
|
# Subspace Distances: Normalized `|| P_A - P_B ||_F` where
|
||||||
|
# `P_A = A (A' A)^-1 A'` and the normalization means that with
|
||||||
|
# respect to the dimensions of `A, B` the subspace distance is in the
|
||||||
|
# range `[0, 1]`.
|
||||||
|
dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE)
|
||||||
|
dist.subspace.init <- dist.subspace(B.true, B.init, normalize = TRUE)
|
||||||
|
dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE)
|
||||||
|
dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
|
||||||
|
dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE)
|
||||||
|
dist.subspace.mgcca <- dist.subspace(B.true, B.mgcca, normalize = TRUE)
|
||||||
|
dist.subspace.hocca <- dist.subspace(B.true, B.hocca, normalize = TRUE)
|
||||||
|
|
||||||
|
# Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
|
||||||
|
# Equiv to Subspace distance in 1D reduction case
|
||||||
|
|
||||||
|
# Call CSV logger writing results to file
|
||||||
|
logger()
|
||||||
|
|
||||||
|
# print progress
|
||||||
|
cat(sprintf("sample size (%d): %d/%d - rep: %d/%d\n",
|
||||||
|
sample.size, which(sample.size == sample.sizes),
|
||||||
|
length(sample.sizes), rep, reps))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### read simulation results generate plots
|
||||||
|
if (!interactive()) { pdf(file = paste(base.name, "pdf", sep = ".")) }
|
||||||
|
|
||||||
|
sim <- read.csv(log.file)
|
||||||
|
|
||||||
|
plot.sim(sim, "dist.subspace", main = "Subspace Distance",
|
||||||
|
xlab = "Sample Size", ylab = "Distance")
|
||||||
|
|
||||||
|
plot.sim(sim, "time", main = "Runtime",
|
||||||
|
xlab = "Sample Size", ylab = "Time [s]", ylim = c(0, 18))
|
|
@ -0,0 +1,149 @@
|
||||||
|
library(tensorPredictors)
|
||||||
|
# library(RGCCA)
|
||||||
|
### Load modified version which _does not_ create a clusster in case of
|
||||||
|
### `n_cores == 1` allowing huge speed improvements! (at least on Ubuntu 22.04 LTS)
|
||||||
|
### Moreover, it is compatible with `Rscript`
|
||||||
|
### Also added `Encoding: UTF-8` in `DESCRIPTION`
|
||||||
|
devtools::load_all("~/Work/tensorPredictors/References/Software/TGCCA-modified", export_all = FALSE)
|
||||||
|
|
||||||
|
|
||||||
|
setwd("~/Work/tensorPredictors/sim/")
|
||||||
|
base.name <- format(Sys.time(), "sim_1b_normal-%Y%m%dT%H%M")
|
||||||
|
|
||||||
|
# Source utility function used in most simulations (extracted for convenience)
|
||||||
|
source("./sim_utils.R")
|
||||||
|
|
||||||
|
# Set PRNG seed for reproducability
|
||||||
|
# Note: `0x` is the HEX number prefix and the trailing `L` stands for "long"
|
||||||
|
# which is `R`s way if indicating an integer.
|
||||||
|
set.seed(0x1bL, "Mersenne-Twister", "Inversion", "Rejection")
|
||||||
|
|
||||||
|
### Simulation configuration
|
||||||
|
reps <- 100 # number of simulation replications
|
||||||
|
sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n`
|
||||||
|
dimX <- c(2, 3, 5) # predictor `X` dimension
|
||||||
|
dimF <- rep(2, length(dimX)) # "function" `F(y)` of responce `y` dimension
|
||||||
|
|
||||||
|
# setup true model parameters
|
||||||
|
betas <- Map(diag, 1, dimX, dimF)
|
||||||
|
Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), dimX) # AR(0.5)
|
||||||
|
eta1 <- 0
|
||||||
|
|
||||||
|
# data sampling routine
|
||||||
|
sample.data <- function(sample.size, eta1, betas, Omegas) {
|
||||||
|
# responce is a standard normal variable
|
||||||
|
y <- rnorm(sample.size)
|
||||||
|
# F(y) is a tensor of monomials
|
||||||
|
F <- sapply(y, function(yi) Reduce(outer, Map(`^`, yi, Map(seq, 0, len = dimF))))
|
||||||
|
dim(F) <- c(dimF, sample.size)
|
||||||
|
|
||||||
|
# sample predictors from tensor normal X | Y = y (last axis is sample axis)
|
||||||
|
sample.axis <- length(betas) + 1L
|
||||||
|
Deltas <- Map(solve, Omegas) # normal covariances
|
||||||
|
mu_y <- mlm(mlm(F, betas) + as.vector(eta1), Deltas) # conditional mean
|
||||||
|
X <- mu_y + rtensornorm(sample.size, 0, Deltas, sample.axis) # response
|
||||||
|
|
||||||
|
list(X = X, F = F, y = y, sample.axis = sample.axis)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create a CSV logger to write simulation results to
|
||||||
|
log.file <- paste(base.name, "csv", sep = ".")
|
||||||
|
logger <- CSV.logger(
|
||||||
|
file.name = log.file,
|
||||||
|
header = c("sample.size", "rep", outer(
|
||||||
|
c("time", "dist.subspace", "dist.projection"), # measures
|
||||||
|
c("gmlm", "pca", "hopca", "tsir", "mgcca"), # methods
|
||||||
|
paste, sep = "."
|
||||||
|
))
|
||||||
|
)
|
||||||
|
|
||||||
|
# compute true (full) model parameters to compair estimates against
|
||||||
|
B.true <- Reduce(`%x%`, rev(betas))
|
||||||
|
|
||||||
|
### for each sample size
|
||||||
|
for (sample.size in sample.sizes) {
|
||||||
|
# repeate every simulation
|
||||||
|
for (rep in seq_len(reps)) {
|
||||||
|
# Sample training data
|
||||||
|
c(X, F, y, sample.axis) %<-% sample.data(sample.size, eta1, betas, Omegas)
|
||||||
|
|
||||||
|
# fit different models
|
||||||
|
# Wrapped in try-catch clock to ensure the simulation continues,
|
||||||
|
# if an error occures continue with nest resplication and log an error message
|
||||||
|
tryCatch({
|
||||||
|
time.gmlm <- system.time(
|
||||||
|
fit.gmlm <- gmlm_tensor_normal(X, F, sample.axis = sample.axis)
|
||||||
|
)["user.self"]
|
||||||
|
time.pca <- system.time(
|
||||||
|
fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(dimF))
|
||||||
|
)["user.self"]
|
||||||
|
time.hopca <- system.time(
|
||||||
|
fit.hopca <- HOPCA(X, npc = dimF, sample.axis = sample.axis)
|
||||||
|
)["user.self"]
|
||||||
|
time.tsir <- system.time(
|
||||||
|
fit.tsir <- TSIR(X, y, dimF, sample.axis = sample.axis)
|
||||||
|
)["user.self"]
|
||||||
|
# `mgcca` expects the first axis to be the sample axis
|
||||||
|
X1 <- aperm(X, c(sample.axis, seq_along(dim(X))[-sample.axis]))
|
||||||
|
F1 <- aperm(F, c(sample.axis, seq_along(dim(X))[-sample.axis]))
|
||||||
|
time.mgcca <- system.time(
|
||||||
|
fit.mgcca <- mgcca(
|
||||||
|
list(X1, drop(F1)), # `drop` removes 1D axis
|
||||||
|
quiet = TRUE,
|
||||||
|
scheme = "factorial",
|
||||||
|
ncomp = rep(prod(dimF), 2)
|
||||||
|
)
|
||||||
|
)["user.self"]
|
||||||
|
}, error = function(ex) {
|
||||||
|
print(ex)
|
||||||
|
})
|
||||||
|
|
||||||
|
# Compute true reduction matrix
|
||||||
|
B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(betas)))
|
||||||
|
B.pca <- fit.pca$rotation
|
||||||
|
B.hopca <- Reduce(`%x%`, rev(fit.hopca))
|
||||||
|
B.tsir <- Reduce(`%x%`, rev(fit.tsir))
|
||||||
|
B.mgcca <- fit.mgcca$astar[[1]]
|
||||||
|
|
||||||
|
# Subspace Distances: Normalized `|| P_A - P_B ||_F` where
|
||||||
|
# `P_A = A (A' A)^-1 A'` and the normalization means that with
|
||||||
|
# respect to the dimensions of `A, B` the subspace distance is in the
|
||||||
|
# range `[0, 1]`.
|
||||||
|
dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE)
|
||||||
|
dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE)
|
||||||
|
dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
|
||||||
|
dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE)
|
||||||
|
dist.subspace.mgcca <- dist.subspace(B.true, B.mgcca, normalize = TRUE)
|
||||||
|
|
||||||
|
# Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
|
||||||
|
dist.projection.gmlm <- dist.projection(B.true, B.gmlm)
|
||||||
|
dist.projection.pca <- dist.projection(B.true, B.pca)
|
||||||
|
dist.projection.hopca <- dist.projection(B.true, B.hopca)
|
||||||
|
dist.projection.tsir <- dist.projection(B.true, B.tsir)
|
||||||
|
dist.projection.mgcca <- dist.projection(B.true, B.mgcca)
|
||||||
|
|
||||||
|
# Call CSV logger writing results to file
|
||||||
|
logger()
|
||||||
|
|
||||||
|
# print progress
|
||||||
|
cat(sprintf("sample size (%d): %d/%d - rep: %d/%d\n",
|
||||||
|
sample.size, which(sample.size == sample.sizes),
|
||||||
|
length(sample.sizes), rep, reps))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### read simulation results generate plots
|
||||||
|
if (!interactive()) { pdf(file = paste(base.name, "pdf", sep = ".")) }
|
||||||
|
|
||||||
|
sim <- read.csv(log.file)
|
||||||
|
|
||||||
|
plot.sim(sim, "dist.subspace", main = "Subspace Distance",
|
||||||
|
xlab = "Sample Size", ylab = "Distance")
|
||||||
|
|
||||||
|
plot.sim(sim, "dist.projection", main = "Projection Distance",
|
||||||
|
xlab = "Sample Size", ylab = "Distance")
|
||||||
|
|
||||||
|
plot.sim(sim, "time", main = "Runtime",
|
||||||
|
xlab = "Sample Size", ylab = "Time [s]", ylim = c(0, 18))
|
|
@ -0,0 +1,185 @@
|
||||||
|
library(tensorPredictors)
|
||||||
|
# library(RGCCA)
|
||||||
|
### Load modified version which _does not_ create a clusster in case of
|
||||||
|
### `n_cores == 1` allowing huge speed improvements! (at least on Ubuntu 22.04 LTS)
|
||||||
|
### Moreover, it is compatible with `Rscript`
|
||||||
|
### Also added `Encoding: UTF-8` in `DESCRIPTION`
|
||||||
|
devtools::load_all("~/Work/tensorPredictors/References/Software/TGCCA-modified", export_all = FALSE)
|
||||||
|
|
||||||
|
|
||||||
|
setwd("~/Work/tensorPredictors/sim/")
|
||||||
|
base.name <- format(Sys.time(), "sim_1b_normal-%Y%m%dT%H%M")
|
||||||
|
|
||||||
|
# Source utility function used in most simulations (extracted for convenience)
|
||||||
|
source("./sim_utils.R")
|
||||||
|
|
||||||
|
# Set PRNG seed for reproducability
|
||||||
|
# Note: `0x` is the HEX number prefix and the trailing `L` stands for "long"
|
||||||
|
# which is `R`s way if indicating an integer.
|
||||||
|
set.seed(0x1bL, "Mersenne-Twister", "Inversion", "Rejection")
|
||||||
|
|
||||||
|
### Simulation configuration
|
||||||
|
reps <- 100 # number of simulation replications
|
||||||
|
sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n`
|
||||||
|
validation.sizes <- 10000
|
||||||
|
dimX <- c(2, 3, 5) # predictor `X` dimension
|
||||||
|
dimF <- rep(2, length(dimX)) # "function" `F(y)` of responce `y` dimension
|
||||||
|
|
||||||
|
# setup true model parameters
|
||||||
|
betas <- Map(diag, 1, dimX, dimF)
|
||||||
|
Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), dimX) # AR(0.5)
|
||||||
|
eta1 <- 0
|
||||||
|
|
||||||
|
# data sampling routine
|
||||||
|
sample.data <- function(sample.size, eta1, betas, Omegas) {
|
||||||
|
# responce is a standard normal variable
|
||||||
|
y <- rnorm(sample.size)
|
||||||
|
# F(y) is a tensor of monomials
|
||||||
|
F <- sapply(y, function(yi) Reduce(outer, Map(`^`, yi, Map(seq, 0, len = dimF))))
|
||||||
|
dim(F) <- c(dimF, sample.size)
|
||||||
|
|
||||||
|
# sample predictors from tensor normal X | Y = y (last axis is sample axis)
|
||||||
|
sample.axis <- length(betas) + 1L
|
||||||
|
Deltas <- Map(solve, Omegas) # normal covariances
|
||||||
|
mu_y <- mlm(mlm(F, betas) + as.vector(eta1), Deltas) # conditional mean
|
||||||
|
X <- mu_y + rtensornorm(sample.size, 0, Deltas, sample.axis) # response
|
||||||
|
|
||||||
|
list(X = X, F = F, y = y, sample.axis = sample.axis)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create a CSV logger to write simulation results to
|
||||||
|
log.file <- paste(base.name, "csv", sep = ".")
|
||||||
|
logger <- CSV.logger(
|
||||||
|
file.name = log.file,
|
||||||
|
header = c("sample.size", "rep", outer(
|
||||||
|
c("dist.subspace", "dist.projection"), # measures
|
||||||
|
c("gmlm", "tsir", "hopca"), # methods
|
||||||
|
paste, sep = "."
|
||||||
|
), outer(
|
||||||
|
c("time", "dist.min.subspace", "dist.min.projection", "reconst.error"), # measures
|
||||||
|
c("gmlm", "pca", "hopca", "tsir", "mgcca"), # methods
|
||||||
|
paste, sep = "."
|
||||||
|
))
|
||||||
|
)
|
||||||
|
|
||||||
|
# compute true (full) model parameters to compair estimates against
|
||||||
|
B.true <- Reduce(`%x%`, rev(betas))
|
||||||
|
minimal <- function(B) { cbind(
|
||||||
|
"1" = B[, 1],
|
||||||
|
"y" = rowSums(B[, c(2, 3, 5)]),
|
||||||
|
"y^2" = rowSums(B[, c(4, 6, 7)]),
|
||||||
|
"y^3" = B[, 8]
|
||||||
|
) }
|
||||||
|
B.min.true <- minimal(B.true)
|
||||||
|
|
||||||
|
### for each sample size
|
||||||
|
for (sample.size in sample.sizes) {
|
||||||
|
# repeate every simulation
|
||||||
|
for (rep in seq_len(reps)) {
|
||||||
|
# Sample training data
|
||||||
|
c(X, F, y, sample.axis) %<-% sample.data(sample.size, eta1, betas, Omegas)
|
||||||
|
|
||||||
|
# fit different models
|
||||||
|
# Wrapped in try-catch clock to ensure the simulation continues,
|
||||||
|
# if an error occures continue with nest resplication and log an error message
|
||||||
|
tryCatch({
|
||||||
|
time.gmlm <- system.time(
|
||||||
|
fit.gmlm <- gmlm_tensor_normal(X, F, sample.axis = sample.axis)
|
||||||
|
)["user.self"]
|
||||||
|
time.pca <- system.time(
|
||||||
|
fit.pca <- prcomp(mat(X, sample.axis), rank. = 4)
|
||||||
|
)["user.self"]
|
||||||
|
time.hopca <- system.time(
|
||||||
|
fit.hopca <- HOPCA(X, npc = dimF, sample.axis = sample.axis)
|
||||||
|
)["user.self"]
|
||||||
|
time.tsir <- system.time(
|
||||||
|
fit.tsir <- TSIR(X, y, dimF, sample.axis = sample.axis)
|
||||||
|
)["user.self"]
|
||||||
|
# `mgcca` expects the first axis to be the sample axis
|
||||||
|
X.perm <- aperm(X, c(sample.axis, seq_along(dim(X))[-sample.axis]))
|
||||||
|
F.min <- mat(F, sample.axis)[, c(2, 4, 8)]
|
||||||
|
time.mgcca <- system.time(
|
||||||
|
fit.mgcca <- mgcca(
|
||||||
|
list(X.perm, F.min), # `drop` removes 1D axis
|
||||||
|
quiet = TRUE,
|
||||||
|
scheme = "factorial",
|
||||||
|
ncomp = c(4, 1)
|
||||||
|
)
|
||||||
|
)["user.self"]
|
||||||
|
}, error = function(ex) {
|
||||||
|
print(ex)
|
||||||
|
})
|
||||||
|
|
||||||
|
# Compute true reduction matrix
|
||||||
|
B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(betas)))
|
||||||
|
B.hopca <- Reduce(`%x%`, rev(fit.hopca))
|
||||||
|
B.tsir <- Reduce(`%x%`, rev(fit.tsir))
|
||||||
|
|
||||||
|
# and minimal true reductions if not already minimal
|
||||||
|
B.min.gmlm <- minimal(B.gmlm)
|
||||||
|
B.min.pca <- fit.pca$rotation
|
||||||
|
B.min.hopca <- B.hopca[, 1:4]
|
||||||
|
B.min.tsir <- La.svd(B.tsir, 4L, 0L)$u
|
||||||
|
B.min.mgcca <- fit.mgcca$astar[[1]]
|
||||||
|
|
||||||
|
|
||||||
|
# Subspace Distances: Normalized `|| P_A - P_B ||_F` where
|
||||||
|
# `P_A = A (A' A)^-1 A'` and the normalization means that with
|
||||||
|
# respect to the dimensions of `A, B` the subspace distance is in the
|
||||||
|
# range `[0, 1]`.
|
||||||
|
dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE)
|
||||||
|
dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
|
||||||
|
dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE)
|
||||||
|
|
||||||
|
dist.min.subspace.gmlm <- dist.subspace(B.min.true, B.min.gmlm, normalize = TRUE)
|
||||||
|
dist.min.subspace.pca <- dist.subspace(B.min.true, B.min.pca, normalize = TRUE)
|
||||||
|
dist.min.subspace.hopca <- dist.subspace(B.min.true, B.min.hopca, normalize = TRUE)
|
||||||
|
dist.min.subspace.tsir <- dist.subspace(B.min.true, B.min.tsir, normalize = TRUE)
|
||||||
|
dist.min.subspace.mgcca <- dist.subspace(B.min.true, B.min.mgcca, normalize = TRUE)
|
||||||
|
|
||||||
|
# Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
|
||||||
|
dist.projection.gmlm <- dist.projection(B.true, B.gmlm)
|
||||||
|
dist.projection.hopca <- dist.projection(B.true, B.hopca)
|
||||||
|
dist.projection.tsir <- dist.projection(B.true, B.tsir)
|
||||||
|
|
||||||
|
dist.min.projection.gmlm <- dist.projection(B.min.true, B.min.gmlm)
|
||||||
|
dist.min.projection.pca <- dist.projection(B.min.true, B.min.pca)
|
||||||
|
dist.min.projection.hopca <- dist.projection(B.min.true, B.min.hopca)
|
||||||
|
dist.min.projection.tsir <- dist.projection(B.min.true, B.min.tsir)
|
||||||
|
dist.min.projection.mgcca <- dist.projection(B.min.true, B.min.mgcca)
|
||||||
|
|
||||||
|
# # Reconstruction error (MSE) of y given X with a new sample
|
||||||
|
# c(X, F, y, sample.axis) %<-% sample.data(validation.sizes, eta1, betas, Omegas)
|
||||||
|
# y.gmlm <- rowMeans(mat(mlm(X, fit.gmlm$betas), sample.axis)[, c(2, 3, 5)])
|
||||||
|
|
||||||
|
|
||||||
|
# Call CSV logger writing results to file
|
||||||
|
logger()
|
||||||
|
|
||||||
|
# print progress
|
||||||
|
cat(sprintf("sample size (%d): %d/%d - rep: %d/%d\n",
|
||||||
|
sample.size, which(sample.size == sample.sizes),
|
||||||
|
length(sample.sizes), rep, reps))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
### read simulation results and generate plots
|
||||||
|
if (!interactive()) { pdf(file = paste(base.name, "pdf", sep = ".")) }
|
||||||
|
|
||||||
|
sim <- read.csv(log.file)
|
||||||
|
|
||||||
|
plot.sim(sim, "dist.subspace", main = "Full Subspace Distance",
|
||||||
|
xlab = "Sample Size", ylab = "Distance")
|
||||||
|
|
||||||
|
plot.sim(sim, "dist.min.subspace", main = "Min Subspace Distance",
|
||||||
|
xlab = "Sample Size", ylab = "Distance")
|
||||||
|
|
||||||
|
plot.sim(sim, "dist.projection", main = "Full Projection Distance",
|
||||||
|
xlab = "Sample Size", ylab = "Distance")
|
||||||
|
|
||||||
|
plot.sim(sim, "dist.min.projection", main = "Min Projection Distance",
|
||||||
|
xlab = "Sample Size", ylab = "Distance")
|
||||||
|
|
||||||
|
plot.sim(sim, "time", main = "Runtime",
|
||||||
|
xlab = "Sample Size", ylab = "Time")
|
|
@ -0,0 +1,152 @@
|
||||||
|
library(tensorPredictors)
|
||||||
|
# library(RGCCA)
|
||||||
|
### Load modified version which _does not_ create a clusster in case of
|
||||||
|
### `n_cores == 1` allowing huge speed improvements! (at least on Ubuntu 22.04 LTS)
|
||||||
|
### Moreover, it is compatible with `Rscript`
|
||||||
|
### Also added `Encoding: UTF-8` in `DESCRIPTION`
|
||||||
|
devtools::load_all("~/Work/tensorPredictors/References/Software/TGCCA-modified", export_all = FALSE)
|
||||||
|
|
||||||
|
|
||||||
|
setwd("~/Work/tensorPredictors/sim/")
|
||||||
|
base.name <- format(Sys.time(), "sim_1c_normal-%Y%m%dT%H%M")
|
||||||
|
|
||||||
|
# Source utility function used in most simulations (extracted for convenience)
|
||||||
|
source("./sim_utils.R")
|
||||||
|
|
||||||
|
# Set PRNG seed for reproducability
|
||||||
|
# Note: `0x` is the HEX number prefix and the trailing `L` stands for "long"
|
||||||
|
# which is `R`s way if indicating an integer.
|
||||||
|
set.seed(0x1cL, "Mersenne-Twister", "Inversion", "Rejection")
|
||||||
|
|
||||||
|
### Simulation configuration
|
||||||
|
reps <- 100 # number of simulation replications
|
||||||
|
sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n`
|
||||||
|
dimX <- c(2, 3, 5) # predictor `X` dimension
|
||||||
|
dimF <- rep(2, length(dimX)) # "function" `F(y)` of responce `y` dimension
|
||||||
|
|
||||||
|
# setup true model parameters (rank 1 betas)
|
||||||
|
betas <- Map(function(nr, nc) {
|
||||||
|
tcrossprod((-1)^seq_len(nr), (-1)^seq_len(nc))
|
||||||
|
}, dimX, dimF)
|
||||||
|
Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), dimX) # AR(0.5)
|
||||||
|
eta1 <- 0
|
||||||
|
|
||||||
|
# define projections onto rank 1 matrices for betas
|
||||||
|
proj.betas <- Map(.projRank, rep(1L, length(betas)))
|
||||||
|
|
||||||
|
# data sampling routine
|
||||||
|
sample.data <- function(sample.size, eta1, betas, Omegas) {
|
||||||
|
# responce is a standard normal variable
|
||||||
|
y <- rnorm(sample.size)
|
||||||
|
# F(y) is a tensor of monomials
|
||||||
|
y.pow <- Reduce(function(a, b) outer(a, b, `+`), Map(seq, 0L, len = dimF))
|
||||||
|
F <- t(outer(y, as.vector(y.pow), `^`))
|
||||||
|
dim(F) <- c(dimF, sample.size)
|
||||||
|
|
||||||
|
# sample predictors from tensor normal X | Y = y (last axis is sample axis)
|
||||||
|
sample.axis <- length(betas) + 1L
|
||||||
|
Deltas <- Map(solve, Omegas) # normal covariances
|
||||||
|
mu_y <- mlm(mlm(F, betas) + as.vector(eta1), Deltas) # conditional mean
|
||||||
|
X <- mu_y + rtensornorm(sample.size, 0, Deltas, sample.axis) # responses X
|
||||||
|
|
||||||
|
list(X = X, F = F, y = y, sample.axis = sample.axis)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create a CSV logger to write simulation results to
|
||||||
|
log.file <- paste(base.name, "csv", sep = ".")
|
||||||
|
logger <- CSV.logger(
|
||||||
|
file.name = log.file,
|
||||||
|
header = c("sample.size", "rep", outer(
|
||||||
|
c("time", "dist.subspace"), # measures
|
||||||
|
c("gmlm", "pca", "hopca", "tsir", "mgcca"), # methods
|
||||||
|
paste, sep = "."
|
||||||
|
))
|
||||||
|
)
|
||||||
|
|
||||||
|
# compute true (full) model parameters to compair estimates against
|
||||||
|
B.true <- Reduce(`%x%`, rev(betas))[, 1L, drop = FALSE]
|
||||||
|
|
||||||
|
### for each sample size
|
||||||
|
for (sample.size in sample.sizes) {
|
||||||
|
# repeate every simulation
|
||||||
|
for (rep in seq_len(reps)) {
|
||||||
|
# Sample training data
|
||||||
|
c(X, F, y, sample.axis) %<-% sample.data(sample.size, eta1, betas, Omegas)
|
||||||
|
|
||||||
|
# fit different models
|
||||||
|
# Wrapped in try-catch clock to ensure the simulation continues,
|
||||||
|
# if an error occures continue with nest resplication and log an error message
|
||||||
|
tryCatch({
|
||||||
|
time.gmlm <- system.time(
|
||||||
|
fit.gmlm <- gmlm_tensor_normal(X, F, sample.axis = sample.axis,
|
||||||
|
proj.betas = proj.betas)
|
||||||
|
)["user.self"]
|
||||||
|
time.pca <- system.time(
|
||||||
|
fit.pca <- prcomp(mat(X, sample.axis), rank. = 1L)
|
||||||
|
)["user.self"]
|
||||||
|
time.hopca <- system.time(
|
||||||
|
fit.hopca <- HOPCA(X, npc = c(1L, 1L, 1L), sample.axis = sample.axis)
|
||||||
|
)["user.self"]
|
||||||
|
time.tsir <- system.time(
|
||||||
|
fit.tsir <- TSIR(X, y, d = c(1L, 1L, 1L), sample.axis = sample.axis)
|
||||||
|
)["user.self"]
|
||||||
|
# `mgcca` expects the first axis to be the sample axis
|
||||||
|
X1 <- aperm(X, c(sample.axis, seq_along(dim(X))[-sample.axis]))
|
||||||
|
F1 <- cbind(y, y^2, y^3)
|
||||||
|
time.mgcca <- system.time(
|
||||||
|
fit.mgcca <- mgcca(
|
||||||
|
list(X1, F1), # `drop` removes 1D axis
|
||||||
|
quiet = TRUE,
|
||||||
|
scheme = "factorial",
|
||||||
|
ncomp = c(1L, 1L)
|
||||||
|
)
|
||||||
|
)["user.self"]
|
||||||
|
}, error = function(ex) {
|
||||||
|
print(ex)
|
||||||
|
})
|
||||||
|
|
||||||
|
# Compute true reduction matrix
|
||||||
|
B.gmlm <- Reduce(kronecker, Map(
|
||||||
|
function(beta) qr.Q(qr(beta))[, 1L, drop = FALSE],
|
||||||
|
rev(fit.gmlm$betas)))
|
||||||
|
B.pca <- fit.pca$rotation
|
||||||
|
B.hopca <- Reduce(`%x%`, rev(fit.hopca))
|
||||||
|
B.tsir <- Reduce(`%x%`, rev(fit.tsir))
|
||||||
|
B.mgcca <- fit.mgcca$astar[[1]]
|
||||||
|
|
||||||
|
# Subspace Distances: Normalized `|| P_A - P_B ||_F` where
|
||||||
|
# `P_A = A (A' A)^-1 A'` and the normalization means that with
|
||||||
|
# respect to the dimensions of `A, B` the subspace distance is in the
|
||||||
|
# range `[0, 1]`.
|
||||||
|
dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE)
|
||||||
|
dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE)
|
||||||
|
dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
|
||||||
|
dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE)
|
||||||
|
dist.subspace.mgcca <- dist.subspace(B.true, B.mgcca, normalize = TRUE)
|
||||||
|
|
||||||
|
# No projection distacne as in this case the Subspace and Projection
|
||||||
|
# distances are identical
|
||||||
|
|
||||||
|
# Call CSV logger writing results to file
|
||||||
|
logger()
|
||||||
|
|
||||||
|
# print progress
|
||||||
|
cat(sprintf("sample size (%d): %d/%d - rep: %d/%d\n",
|
||||||
|
sample.size, which(sample.size == sample.sizes),
|
||||||
|
length(sample.sizes), rep, reps))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
### read simulation results generate plots
|
||||||
|
if (!interactive()) { pdf(file = paste(base.name, "pdf", sep = ".")) }
|
||||||
|
|
||||||
|
sim <- read.csv(log.file)
|
||||||
|
|
||||||
|
plot.sim(sim, "dist.subspace", main = "Subspace Distance",
|
||||||
|
xlab = "Sample Size", ylab = "Distance")
|
||||||
|
|
||||||
|
plot.sim(sim, "time", main = "Runtime",
|
||||||
|
xlab = "Sample Size", ylab = "Time [s]", ylim = c(0, 18))
|
||||||
|
|
||||||
|
# aggr <- aggregate(sim[, names(sim) != "sample.size"], list(sample.size = sim$sample.size), mean)
|
|
@ -0,0 +1,157 @@
|
||||||
|
library(tensorPredictors)
|
||||||
|
# library(RGCCA)
|
||||||
|
### Load modified version which _does not_ create a clusster in case of
|
||||||
|
### `n_cores == 1` allowing huge speed improvements! (at least on Ubuntu 22.04 LTS)
|
||||||
|
### Moreover, it is compatible with `Rscript`
|
||||||
|
### Also added `Encoding: UTF-8` in `DESCRIPTION`
|
||||||
|
devtools::load_all("~/Work/tensorPredictors/References/Software/TGCCA-modified", export_all = FALSE)
|
||||||
|
|
||||||
|
|
||||||
|
setwd("~/Work/tensorPredictors/sim/")
|
||||||
|
base.name <- format(Sys.time(), "sim_1d_normal-%Y%m%dT%H%M")
|
||||||
|
|
||||||
|
# Source utility function used in most simulations (extracted for convenience)
|
||||||
|
source("./sim_utils.R")
|
||||||
|
|
||||||
|
# Set PRNG seed for reproducability
|
||||||
|
# Note: `0x` is the HEX number prefix and the trailing `L` stands for "long"
|
||||||
|
# which is `R`s way if indicating an integer.
|
||||||
|
set.seed(0x1dL, "Mersenne-Twister", "Inversion", "Rejection")
|
||||||
|
|
||||||
|
### Simulation configuration
|
||||||
|
reps <- 100 # number of simulation replications
|
||||||
|
sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n`
|
||||||
|
dimX <- c(2, 3, 5) # predictor `X` dimension
|
||||||
|
dimF <- rep(2, length(dimX)) # "function" `F(y)` of responce `y` dimension
|
||||||
|
|
||||||
|
# setup true model parameters (all rank 1 betas)
|
||||||
|
betas <- Map(diag, 1, dimX, dimF)
|
||||||
|
Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), dimX) # AR(0.5)
|
||||||
|
eta1 <- 0
|
||||||
|
|
||||||
|
# define projections onto tri-diagonal matrixes
|
||||||
|
proj.Omegas <- Map(.projBand, Map(dim, Omegas), 1L, 1L)
|
||||||
|
# and project Omegas
|
||||||
|
Omegas <- Map(do.call, proj.Omegas, Map(list, Omegas))
|
||||||
|
|
||||||
|
# data sampling routine
|
||||||
|
sample.data <- function(sample.size, eta1, betas, Omegas) {
|
||||||
|
# responce is a standard normal variable
|
||||||
|
y <- rnorm(sample.size)
|
||||||
|
# F(y) is a tensor of monomials
|
||||||
|
F <- sapply(y, function(yi) Reduce(outer, Map(`^`, yi, Map(seq, 0, len = dimF))))
|
||||||
|
dim(F) <- c(dimF, sample.size)
|
||||||
|
|
||||||
|
# sample predictors from tensor normal X | Y = y (last axis is sample axis)
|
||||||
|
sample.axis <- length(betas) + 1L
|
||||||
|
Deltas <- Map(solve, Omegas) # normal covariances
|
||||||
|
mu_y <- mlm(mlm(F, betas) + as.vector(eta1), Deltas) # conditional mean
|
||||||
|
X <- mu_y + rtensornorm(sample.size, 0, Deltas, sample.axis) # responses X
|
||||||
|
|
||||||
|
list(X = X, F = F, y = y, sample.axis = sample.axis)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create a CSV logger to write simulation results to
|
||||||
|
log.file <- paste(base.name, "csv", sep = ".")
|
||||||
|
logger <- CSV.logger(
|
||||||
|
file.name = log.file,
|
||||||
|
header = c("sample.size", "rep", outer(
|
||||||
|
c("time", "dist.subspace", "dist.projection"), # measures
|
||||||
|
c("gmlm", "pca", "hopca", "tsir", "mgcca"), # methods
|
||||||
|
paste, sep = "."
|
||||||
|
))
|
||||||
|
)
|
||||||
|
|
||||||
|
# compute true (full) model parameters to compair estimates against
|
||||||
|
B.true <- Reduce(`%x%`, rev(betas))
|
||||||
|
|
||||||
|
### for each sample size
|
||||||
|
for (sample.size in sample.sizes) {
|
||||||
|
# repeate every simulation
|
||||||
|
for (rep in seq_len(reps)) {
|
||||||
|
# Sample training data
|
||||||
|
c(X, F, y, sample.axis) %<-% sample.data(sample.size, eta1, betas, Omegas)
|
||||||
|
|
||||||
|
# fit different models
|
||||||
|
# Wrapped in try-catch clock to ensure the simulation continues,
|
||||||
|
# if an error occures continue with nest resplication and log an error message
|
||||||
|
try.catch.block <- tryCatch({
|
||||||
|
time.gmlm <- system.time(
|
||||||
|
fit.gmlm <- gmlm_tensor_normal(X, F, sample.axis = sample.axis,
|
||||||
|
proj.Omegas = proj.Omegas)
|
||||||
|
)["user.self"]
|
||||||
|
time.pca <- system.time(
|
||||||
|
fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(dimF))
|
||||||
|
)["user.self"]
|
||||||
|
time.hopca <- system.time(
|
||||||
|
fit.hopca <- HOPCA(X, npc = dimF, sample.axis = sample.axis)
|
||||||
|
)["user.self"]
|
||||||
|
time.tsir <- system.time(
|
||||||
|
fit.tsir <- TSIR(X, y, dimF, sample.axis = sample.axis)
|
||||||
|
)["user.self"]
|
||||||
|
# `mgcca` expects the first axis to be the sample axis
|
||||||
|
X1 <- aperm(X, c(sample.axis, seq_along(dim(X))[-sample.axis]))
|
||||||
|
F1 <- cbind(y, y^2, y^3)
|
||||||
|
time.mgcca <- system.time(
|
||||||
|
fit.mgcca <- mgcca(
|
||||||
|
list(X1, F1),
|
||||||
|
quiet = TRUE,
|
||||||
|
scheme = "factorial",
|
||||||
|
ncomp = c(prod(dimF), 1L)
|
||||||
|
)
|
||||||
|
)["user.self"]
|
||||||
|
}, error = print)
|
||||||
|
|
||||||
|
# Drop comparison in case any error (in any fitting routine)
|
||||||
|
if (inherits(try.catch.block, "error")) { next }
|
||||||
|
|
||||||
|
# Compute true reduction matrix
|
||||||
|
B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(betas)))
|
||||||
|
B.pca <- fit.pca$rotation
|
||||||
|
B.hopca <- Reduce(`%x%`, rev(fit.hopca))
|
||||||
|
B.tsir <- Reduce(`%x%`, rev(fit.tsir))
|
||||||
|
B.mgcca <- fit.mgcca$astar[[1]]
|
||||||
|
|
||||||
|
# Subspace Distances: Normalized `|| P_A - P_B ||_F` where
|
||||||
|
# `P_A = A (A' A)^-1 A'` and the normalization means that with
|
||||||
|
# respect to the dimensions of `A, B` the subspace distance is in the
|
||||||
|
# range `[0, 1]`.
|
||||||
|
dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE)
|
||||||
|
dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE)
|
||||||
|
dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
|
||||||
|
dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE)
|
||||||
|
dist.subspace.mgcca <- dist.subspace(B.true, B.mgcca, normalize = TRUE)
|
||||||
|
|
||||||
|
# Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
|
||||||
|
dist.projection.gmlm <- dist.projection(B.true, B.gmlm)
|
||||||
|
dist.projection.pca <- dist.projection(B.true, B.pca)
|
||||||
|
dist.projection.hopca <- dist.projection(B.true, B.hopca)
|
||||||
|
dist.projection.tsir <- dist.projection(B.true, B.tsir)
|
||||||
|
dist.projection.mgcca <- dist.projection(B.true, B.mgcca)
|
||||||
|
|
||||||
|
# Call CSV logger writing results to file
|
||||||
|
logger()
|
||||||
|
|
||||||
|
# print progress
|
||||||
|
cat(sprintf("sample size (%d): %d/%d - rep: %d/%d\n",
|
||||||
|
sample.size, which(sample.size == sample.sizes),
|
||||||
|
length(sample.sizes), rep, reps))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### read simulation results generate plots
|
||||||
|
if (!interactive()) { pdf(file = paste(base.name, "pdf", sep = ".")) }
|
||||||
|
|
||||||
|
sim <- read.csv(log.file)
|
||||||
|
|
||||||
|
plot.sim(sim, "dist.subspace", main = "Subspace Distance",
|
||||||
|
xlab = "Sample Size", ylab = "Distance")
|
||||||
|
|
||||||
|
plot.sim(sim, "dist.projection", main = "Projection Distance",
|
||||||
|
xlab = "Sample Size", ylab = "Distance")
|
||||||
|
|
||||||
|
plot.sim(sim, "time", main = "Runtime",
|
||||||
|
xlab = "Sample Size", ylab = "Time [s]", ylim = c(0, 18))
|
|
@ -0,0 +1,231 @@
|
||||||
|
library(tensorPredictors)
|
||||||
|
# library(RGCCA) # for `mgcca`
|
||||||
|
### Load modified version which _does not_ create a clusster in case of
|
||||||
|
### `n_cores == 1` allowing huge speed improvements! (at least on Ubuntu 22.04 LTS)
|
||||||
|
### Moreover, it is compatible with `Rscript`
|
||||||
|
devtools::load_all("~/Work/tensorPredictors/References/Software/TGCCA-modified", export_all = FALSE)
|
||||||
|
|
||||||
|
|
||||||
|
setwd("~/Work/tensorPredictors/sim/")
|
||||||
|
base.name <- format(Sys.time(), "sim_1e_normal-%Y%m%dT%H%M")
|
||||||
|
|
||||||
|
# Source utility function used in most simulations (extracted for convenience)
|
||||||
|
source("./sim_utils.R")
|
||||||
|
|
||||||
|
# Set PRNG seed for reproducability
|
||||||
|
# Note: `0x` is the HEX number prefix and the trailing `L` stands for "long"
|
||||||
|
# which is `R`s way if indicating an integer.
|
||||||
|
set.seed(0x1eL, "Mersenne-Twister", "Inversion", "Rejection")
|
||||||
|
|
||||||
|
### Simulation configuration
|
||||||
|
reps <- 100 # number of simulation replications
|
||||||
|
sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n`
|
||||||
|
dimX <- c(5, 5) # predictor `X` dimension
|
||||||
|
dimF <- c(2, 2)
|
||||||
|
|
||||||
|
|
||||||
|
Sigma <- 0.5^abs(outer(1:prod(dimX), 1:prod(dimX), `-`)) # AR(0.5)
|
||||||
|
|
||||||
|
# # define projections onto tri-diagonal matrixes
|
||||||
|
# proj.betas <- Map(.projRank, rep(1L, length(dimX))) # wrong assumption of low rank betas
|
||||||
|
# proj.Omegas <- Map(.projBand, Map(c, dimX, dimX), 1L, 1L) # wrong assumption of band Scatter matrices
|
||||||
|
|
||||||
|
# data sampling routine
|
||||||
|
sample.data <- function(sample.size, dimX, dimF, B.true, Sigma) {
|
||||||
|
y <- rnorm(sample.size)
|
||||||
|
|
||||||
|
# the true F (in vectorized form)
|
||||||
|
vecF <- rbind(1, sin(y), cos(y), sin(y) * cos(y))
|
||||||
|
|
||||||
|
# sample vectorized X as a multi-variate normal (in vectorized form)
|
||||||
|
vecX <- B.true %*% vecF + t(chol(Sigma)) %*% matrix(rnorm(prod(sample.size, dimX)), prod(dimX))
|
||||||
|
X <- array(vecX, c(dimX, sample.size))
|
||||||
|
|
||||||
|
list(X, vecF, y, length(dim(X)))
|
||||||
|
}
|
||||||
|
|
||||||
|
# wrong assumption about the function `F(y)`
|
||||||
|
F.wrong <- function(y) array(rbind(1, y, y, y^2), c(2, 2, length(y)))
|
||||||
|
|
||||||
|
|
||||||
|
# Create a CSV logger to write simulation results to
|
||||||
|
log.file <- paste(base.name, "csv", sep = ".")
|
||||||
|
logger <- CSV.logger(
|
||||||
|
file.name = log.file,
|
||||||
|
header = c("sample.size", "rep", outer(
|
||||||
|
c("time", "dist.subspace", "dist.projection"), # measures
|
||||||
|
c("gmlm", "pca", "hopca", "tsir", "mgcca"), # methods
|
||||||
|
paste, sep = "."
|
||||||
|
))
|
||||||
|
)
|
||||||
|
|
||||||
|
# Miss-specify true beta as _not_ a kronecker product
|
||||||
|
B.true <- diag(1, prod(dimX), prod(dimF))
|
||||||
|
|
||||||
|
### for each sample size
|
||||||
|
for (sample.size in sample.sizes) {
|
||||||
|
# repeate every simulation
|
||||||
|
for (rep in seq_len(reps)) {
|
||||||
|
# Sample training data
|
||||||
|
c(X, F.true, y, sample.axis) %<-% sample.data(sample.size, dimX, dimF, B.true, Sigma)
|
||||||
|
|
||||||
|
# fit different models
|
||||||
|
# Wrapped in try-catch clock to ensure the simulation continues,
|
||||||
|
# if an error occures continue with nest resplication and log an error message
|
||||||
|
tryCatch({
|
||||||
|
time.gmlm <- system.time(
|
||||||
|
fit.gmlm <- gmlm_tensor_normal(X, F.wrong(y), sample.axis = sample.axis)
|
||||||
|
)["user.self"]
|
||||||
|
time.pca <- system.time(
|
||||||
|
fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(dimF))
|
||||||
|
)["user.self"]
|
||||||
|
time.hopca <- system.time(
|
||||||
|
fit.hopca <- HOPCA(X, npc = dimF, sample.axis = sample.axis)
|
||||||
|
)["user.self"]
|
||||||
|
time.tsir <- system.time(
|
||||||
|
fit.tsir <- TSIR(X, y, dimF, sample.axis = sample.axis)
|
||||||
|
)["user.self"]
|
||||||
|
# `mgcca` expects the first axis to be the sample axis
|
||||||
|
X1 <- aperm(X, c(sample.axis, seq_along(dim(X))[-sample.axis]))
|
||||||
|
time.mgcca <- system.time(
|
||||||
|
fit.mgcca <- mgcca(
|
||||||
|
list(X1, y),
|
||||||
|
quiet = TRUE,
|
||||||
|
scheme = "factorial",
|
||||||
|
ncomp = c(prod(dimF), 1),
|
||||||
|
ranks = c(prod(dimF), 1)
|
||||||
|
)
|
||||||
|
)["user.self"]
|
||||||
|
}, error = function(ex) {
|
||||||
|
print(ex)
|
||||||
|
})
|
||||||
|
|
||||||
|
# Compute true reduction matrix
|
||||||
|
B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(betas)))
|
||||||
|
B.pca <- fit.pca$rotation
|
||||||
|
B.hopca <- Reduce(`%x%`, rev(fit.hopca))
|
||||||
|
B.tsir <- Reduce(`%x%`, rev(fit.tsir))
|
||||||
|
B.mgcca <- fit.mgcca$astar[[1]]
|
||||||
|
|
||||||
|
# Subspace Distances: Normalized `|| P_A - P_B ||_F` where
|
||||||
|
# `P_A = A (A' A)^-1 A'` and the normalization means that with
|
||||||
|
# respect to the dimensions of `A, B` the subspace distance is in the
|
||||||
|
# range `[0, 1]`.
|
||||||
|
dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE)
|
||||||
|
dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE)
|
||||||
|
dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
|
||||||
|
dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE)
|
||||||
|
dist.subspace.mgcca <- dist.subspace(B.true, B.mgcca, normalize = TRUE)
|
||||||
|
|
||||||
|
# Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
|
||||||
|
dist.projection.gmlm <- dist.projection(B.true, B.gmlm)
|
||||||
|
dist.projection.pca <- dist.projection(B.true, B.pca)
|
||||||
|
dist.projection.hopca <- dist.projection(B.true, B.hopca)
|
||||||
|
dist.projection.tsir <- dist.projection(B.true, B.tsir)
|
||||||
|
dist.projection.mgcca <- dist.projection(B.true, B.mgcca)
|
||||||
|
|
||||||
|
# Call CSV logger writing results to file
|
||||||
|
logger()
|
||||||
|
|
||||||
|
# print progress
|
||||||
|
cat(sprintf("sample size (%d): %d/%d - rep: %d/%d\n",
|
||||||
|
sample.size, which(sample.size == sample.sizes),
|
||||||
|
length(sample.sizes), rep, reps))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
### read simulation results generate plots
|
||||||
|
if (!interactive()) { pdf(file = paste(base.name, "pdf", sep = ".")) }
|
||||||
|
|
||||||
|
sim <- read.csv(log.file)
|
||||||
|
|
||||||
|
# Remain sample size grouping variable to avoid conflicts
|
||||||
|
aggr.mean <- aggregate(sim, list(sampleSize = sim$sample.size), mean)
|
||||||
|
aggr.median <- aggregate(sim, list(sampleSize = sim$sample.size), median)
|
||||||
|
aggr.sd <- aggregate(sim, list(sampleSize = sim$sample.size), sd)
|
||||||
|
aggr.min <- aggregate(sim, list(sampleSize = sim$sample.size), min)
|
||||||
|
aggr.max <- aggregate(sim, list(sampleSize = sim$sample.size), max)
|
||||||
|
|
||||||
|
par(pch = 16, bty = "n", lty = "solid", lwd = 1.5)
|
||||||
|
cols <- c(gmlm = "blue", pca = "darkcyan", hopca = "red", tsir = "darkgreen",
|
||||||
|
mgcca = "purple")
|
||||||
|
|
||||||
|
with(aggr.mean, {
|
||||||
|
plot(range(sampleSize), c(0, 1), type = "n",
|
||||||
|
main = "Subspace Distance",
|
||||||
|
xlab = "Sample Size",
|
||||||
|
ylab = "Distance"
|
||||||
|
)
|
||||||
|
for (dist.name in ls(pattern = "^dist.subspace")) {
|
||||||
|
mean <- get(dist.name)
|
||||||
|
median <- aggr.median[aggr.sd$sampleSize == sampleSize, dist.name]
|
||||||
|
sd <- aggr.sd[aggr.sd$sampleSize == sampleSize, dist.name]
|
||||||
|
min <- aggr.min[aggr.sd$sampleSize == sampleSize, dist.name]
|
||||||
|
max <- aggr.max[aggr.sd$sampleSize == sampleSize, dist.name]
|
||||||
|
method <- tail(strsplit(dist.name, ".", fixed = TRUE)[[1]], 1)
|
||||||
|
col <- cols[method]
|
||||||
|
lines(sampleSize, mean, type = "o", col = col, lty = 1, lwd = 1)
|
||||||
|
lines(sampleSize, mean + sd, col = col, lty = 2, lwd = 0.8)
|
||||||
|
lines(sampleSize, mean - sd, col = col, lty = 2, lwd = 0.8)
|
||||||
|
lines(sampleSize, median, col = col, lty = 1, lwd = 1)
|
||||||
|
lines(sampleSize, min, col = col, lty = 3, lwd = 0.6)
|
||||||
|
lines(sampleSize, max, col = col, lty = 3, lwd = 0.6)
|
||||||
|
}
|
||||||
|
|
||||||
|
legend("topright", col = cols, lty = 1, legend = names(cols),
|
||||||
|
bty = "n", lwd = par("lwd"), pch = par("pch"))
|
||||||
|
})
|
||||||
|
|
||||||
|
with(aggr.mean, {
|
||||||
|
plot(range(sampleSize), c(0, 1), type = "n",
|
||||||
|
main = "Projection Distance",
|
||||||
|
xlab = "Sample Size",
|
||||||
|
ylab = "Distance"
|
||||||
|
)
|
||||||
|
for (dist.name in ls(pattern = "^dist.projection")) {
|
||||||
|
mean <- get(dist.name)
|
||||||
|
median <- aggr.median[aggr.sd$sampleSize == sampleSize, dist.name]
|
||||||
|
sd <- aggr.sd[aggr.sd$sampleSize == sampleSize, dist.name]
|
||||||
|
min <- aggr.min[aggr.sd$sampleSize == sampleSize, dist.name]
|
||||||
|
max <- aggr.max[aggr.sd$sampleSize == sampleSize, dist.name]
|
||||||
|
method <- tail(strsplit(dist.name, ".", fixed = TRUE)[[1]], 1)
|
||||||
|
col <- cols[method]
|
||||||
|
lines(sampleSize, mean, type = "o", col = col, lty = 1, lwd = 1)
|
||||||
|
lines(sampleSize, mean + sd, col = col, lty = 2, lwd = 0.8)
|
||||||
|
lines(sampleSize, mean - sd, col = col, lty = 2, lwd = 0.8)
|
||||||
|
lines(sampleSize, median, col = col, lty = 1, lwd = 1)
|
||||||
|
lines(sampleSize, min, col = col, lty = 3, lwd = 0.6)
|
||||||
|
lines(sampleSize, max, col = col, lty = 3, lwd = 0.6)
|
||||||
|
}
|
||||||
|
|
||||||
|
legend("topright", col = cols, lty = 1, legend = names(cols),
|
||||||
|
bty = "n", lwd = par("lwd"), pch = par("pch"))
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# sample.axis <- 1L
|
||||||
|
# F.wrong <- array(outer(y, c(0, 1, 1, 2, 1, 2, 2, 3), `^`), c(10, 2, 2, 2))
|
||||||
|
|
||||||
|
|
||||||
|
# F.true <- array(c(1, sin(y), cos(y)))
|
||||||
|
|
||||||
|
|
||||||
|
# 1 s c
|
||||||
|
# s ss cs
|
||||||
|
# c sc cc
|
||||||
|
|
||||||
|
# s ss cs
|
||||||
|
# ss sss css
|
||||||
|
# cs scs ccs
|
||||||
|
|
||||||
|
# c sc cc
|
||||||
|
# sc ssc csc
|
||||||
|
# cc scc ccc
|
||||||
|
|
||||||
|
# osc <-
|
||||||
|
|
||||||
|
# I <- array(0, c(3, 3, 3))
|
||||||
|
|
||||||
|
# slice.index()
|
|
@ -0,0 +1,315 @@
|
||||||
|
# library(tensorPredictors)
|
||||||
|
devtools::load_all("~/Work/tensorPredictors/tensorPredictors", export_all = FALSE)
|
||||||
|
|
||||||
|
library(logisticPCA)
|
||||||
|
# library(RGCCA)
|
||||||
|
# Use modified version of `RGCCA`
|
||||||
|
# Reasons (on Ubuntu 22.04 LTS):
|
||||||
|
# - compatible with `Rscript`
|
||||||
|
# - about 4 times faster for small problems
|
||||||
|
# Changes:
|
||||||
|
# - Run in main thread, avoid `parallel::makeCluster` if `n_cores == 1`
|
||||||
|
# (file "R/mgccak.R" lines 81:103)
|
||||||
|
# - added `Encoding: UTF-8`
|
||||||
|
# (file "DESCRIPTION")
|
||||||
|
suppressWarnings({
|
||||||
|
devtools::load_all("~/Work/tensorPredictors/References/Software/TGCCA-modified", export_all = FALSE)
|
||||||
|
})
|
||||||
|
|
||||||
|
setwd("~/Work/tensorPredictors/sim/")
|
||||||
|
base.name <- format(Sys.time(), "sim_2a_ising-%Y%m%dT%H%M")
|
||||||
|
|
||||||
|
# Source utility function used in most simulations (extracted for convenience)
|
||||||
|
source("./sim_utils.R")
|
||||||
|
|
||||||
|
# Set PRNG seed for reproducability
|
||||||
|
# Note: `0x` is the HEX number prefix and the trailing `L` stands for "long"
|
||||||
|
# which is `R`s way if indicating an integer.
|
||||||
|
set.seed(0x2aL, "Mersenne-Twister", "Inversion", "Rejection")
|
||||||
|
|
||||||
|
|
||||||
|
reps <- 100 # number of simulation replications
|
||||||
|
sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n`
|
||||||
|
dimX <- c(2, 3) # predictor `X` dimension
|
||||||
|
dimF <- rep(1, length(dimX)) # "function" `F(y)` of responce `y` dimension
|
||||||
|
|
||||||
|
betas <- Map(diag, 1, dimX, dimF)
|
||||||
|
Omegas <- list(toeplitz(c(0, -2)), toeplitz(seq(1, 0, by = -0.5)))
|
||||||
|
|
||||||
|
|
||||||
|
# data sampling routine
|
||||||
|
sample.data <- function(sample.size, betas, Omegas) {
|
||||||
|
dimX <- mapply(nrow, betas)
|
||||||
|
dimF <- mapply(ncol, betas)
|
||||||
|
|
||||||
|
# generate response (sample axis is last axis)
|
||||||
|
y <- runif(prod(sample.size, dimF), -2, 2)
|
||||||
|
F <- array(y, dim = c(dimF, sample.size)) # ~ U[-1, 1]
|
||||||
|
|
||||||
|
Omega <- Reduce(kronecker, rev(Omegas))
|
||||||
|
|
||||||
|
X <- apply(F, length(dim(F)), function(Fi) {
|
||||||
|
dim(Fi) <- dimF
|
||||||
|
params <- diag(as.vector(mlm(Fi, betas))) + Omega
|
||||||
|
tensorPredictors::ising_sample(1, params)
|
||||||
|
})
|
||||||
|
dim(X) <- c(dimX, sample.size)
|
||||||
|
|
||||||
|
list(X = X, F = F, y = y, sample.axis = length(dim(X)))
|
||||||
|
}
|
||||||
|
|
||||||
|
lpca.hyper.param <- local({
|
||||||
|
c(X, F, y, sample.axis) %<-% sample.data(1e3, betas, Omegas)
|
||||||
|
vecX <- mat(X, sample.axis)
|
||||||
|
CV <- cv.lpca(vecX, ks = prod(dimF), ms = seq(1, 20, by = 0.5))
|
||||||
|
# plot(CV)
|
||||||
|
as.numeric(colnames(CV))[which.min(CV)]
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
# Create a CSV logger to write simulation results to
|
||||||
|
log.file <- paste(base.name, "csv", sep = ".")
|
||||||
|
logger <- CSV.logger(
|
||||||
|
file.name = log.file,
|
||||||
|
header = c("sample.size", "rep", outer(
|
||||||
|
c("time", "dist.subspace"), # measures
|
||||||
|
c("gmlm", "tnormal", "pca", "hopca", "lpca", "clpca", "tsir", "mgcca"), # methods
|
||||||
|
paste, sep = "."
|
||||||
|
))
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# compute true (full) model parameters to compair estimates against
|
||||||
|
B.true <- Reduce(`%x%`, rev(betas))
|
||||||
|
|
||||||
|
### for each sample size
|
||||||
|
for (sample.size in sample.sizes) {
|
||||||
|
# repeate every simulation
|
||||||
|
for (rep in seq_len(reps)) {
|
||||||
|
# Sample training data
|
||||||
|
c(X, F, y, sample.axis) %<-% sample.data(sample.size, betas, Omegas)
|
||||||
|
|
||||||
|
# fit different models
|
||||||
|
# Wrapped in try-catch clock to ensure the simulation continues,
|
||||||
|
# if an error occures continue with nest resplication and log an error message
|
||||||
|
try.catch.block <- tryCatch({
|
||||||
|
time.gmlm <- system.time(
|
||||||
|
fit.gmlm <- gmlm_ising(X, F, sample.axis = sample.axis)
|
||||||
|
)["user.self"]
|
||||||
|
time.tnormal <- -1 # part of Ising gmlm (not relevent here)
|
||||||
|
time.pca <- system.time(
|
||||||
|
fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(dimF))
|
||||||
|
)["user.self"]
|
||||||
|
time.hopca <- system.time(
|
||||||
|
fit.hopca <- HOPCA(X, npc = dimF, sample.axis = sample.axis)
|
||||||
|
)["user.self"]
|
||||||
|
time.lpca <- system.time(
|
||||||
|
fit.lpca <- logisticPCA(mat(X, sample.axis), k = prod(dimF), m = lpca.hyper.param)
|
||||||
|
)["user.self"]
|
||||||
|
time.clpca <- system.time(
|
||||||
|
fit.clpca <- convexLogisticPCA(mat(X, sample.axis), k = prod(dimF), m = lpca.hyper.param)
|
||||||
|
)["user.self"]
|
||||||
|
time.tsir <- system.time(
|
||||||
|
fit.tsir <- TSIR(X, y, dimF, sample.axis = sample.axis)
|
||||||
|
)["user.self"]
|
||||||
|
# `mgcca` expects the first axis to be the sample axis
|
||||||
|
X1 <- aperm(X, c(sample.axis, seq_along(dim(X))[-sample.axis]))
|
||||||
|
time.mgcca <- system.time(
|
||||||
|
fit.mgcca <- mgcca(
|
||||||
|
list(X1, y), # `drop` removes 1D axis
|
||||||
|
quiet = TRUE,
|
||||||
|
scheme = "factorial",
|
||||||
|
ncomp = c(1, 1)
|
||||||
|
)
|
||||||
|
)["user.self"]
|
||||||
|
}, error = print)
|
||||||
|
|
||||||
|
# Drop comparison in case any error (in any fitting routine)
|
||||||
|
if (inherits(try.catch.block, "error")) { next }
|
||||||
|
|
||||||
|
# Compute true reduction matrix
|
||||||
|
B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(betas)))
|
||||||
|
B.tnormal <- with(attr(fit.gmlm, "tensor_normal"), Reduce(`%x%`, rev(betas)))
|
||||||
|
B.pca <- fit.pca$rotation
|
||||||
|
B.hopca <- Reduce(`%x%`, rev(fit.hopca))
|
||||||
|
B.lpca <- fit.lpca$U
|
||||||
|
B.clpca <- fit.clpca$U
|
||||||
|
B.tsir <- Reduce(`%x%`, rev(fit.tsir))
|
||||||
|
B.mgcca <- fit.mgcca$astar[[1]]
|
||||||
|
|
||||||
|
# Subspace Distances: Normalized `|| P_A - P_B ||_F` where
|
||||||
|
# `P_A = A (A' A)^-1 A'` and the normalization means that with
|
||||||
|
# respect to the dimensions of `A, B` the subspace distance is in the
|
||||||
|
# range `[0, 1]`.
|
||||||
|
dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE)
|
||||||
|
dist.subspace.tnormal <- dist.subspace(B.true, B.tnormal, normalize = TRUE)
|
||||||
|
dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE)
|
||||||
|
dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
|
||||||
|
dist.subspace.lpca <- dist.subspace(B.true, B.lpca, normalize = TRUE)
|
||||||
|
dist.subspace.clpca <- dist.subspace(B.true, B.clpca, normalize = TRUE)
|
||||||
|
dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE)
|
||||||
|
dist.subspace.mgcca <- dist.subspace(B.true, B.mgcca, normalize = TRUE)
|
||||||
|
|
||||||
|
# # Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
|
||||||
|
# equiv to Subspace distance in this case
|
||||||
|
|
||||||
|
# Call CSV logger writing results to file
|
||||||
|
logger()
|
||||||
|
|
||||||
|
# print progress
|
||||||
|
cat(sprintf("sample size (%d): %d/%d - rep: %d/%d\n",
|
||||||
|
sample.size, which(sample.size == sample.sizes),
|
||||||
|
length(sample.sizes), rep, reps))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
### read simulation results generate plots
|
||||||
|
if (!interactive()) { pdf(file = paste(base.name, "pdf", sep = ".")) }
|
||||||
|
|
||||||
|
sim <- read.csv(log.file)
|
||||||
|
|
||||||
|
|
||||||
|
plot.sim(sim, "dist.subspace", main = "Subspace Distance",
|
||||||
|
xlab = "Sample Size", ylab = "Distance")
|
||||||
|
|
||||||
|
# plot.sim(sim, "dist.projection", main = "Projection Distance",
|
||||||
|
# xlab = "Sample Size", ylab = "Distance")
|
||||||
|
|
||||||
|
plot.sim(sim, "time", main = "Runtime",
|
||||||
|
xlab = "Sample Size", ylab = "Time [s]", ylim = c(0, 18))
|
||||||
|
|
||||||
|
# aggr <- aggregate(sim, list(sim$sample.size), sd)
|
||||||
|
# stats <- aggr[, c(2, 5, 7, 9, 11, 13, 15, 17, 19)]
|
||||||
|
|
||||||
|
# names(stats) <- Map(tail, strsplit(names(stats), ".", fixed = TRUE), 1)
|
||||||
|
# round(stats * 100, 2)
|
||||||
|
|
||||||
|
# sim <- sim[!startsWith(names(sim), "time")]
|
||||||
|
# sim <- sim[names(sim) != "rep"]
|
||||||
|
# names(sim) <- strsplit(names(stats), ".", fixed = TRUE)
|
||||||
|
# (as.data.frame(Map(function(m, s) {
|
||||||
|
# paste0(round(m, 2), " (", round(s, 2), ")")
|
||||||
|
# },
|
||||||
|
# aggregate(sim, list(sim$size), mean),
|
||||||
|
# aggregate(sim, list(sim$size), sd)
|
||||||
|
# )))
|
||||||
|
|
||||||
|
# $n$ & gmlm & pca & hopca lpca & clpca & tsir & mgcca
|
||||||
|
# 100 & 0.34 (0.14) & 0.90 (0.04) & 0.90 (0.05) 0.94 (0.09) & 1 0.91 (0.03) & 0.48 (0.19) & 0.55 (0.13)
|
||||||
|
# 200 & 0.25 (0.11) & 0.90 (0.03) & 0.90 (0.03) 0.96 (0.07) & 2 0.91 (0.02) & 0.38 (0.16) & 0.53 (0.10)
|
||||||
|
# 300 & 0.20 (0.09) & 0.89 (0.02) & 0.89 (0.02) 0.97 (0.06) & 3 0.91 (0.02) & 0.29 (0.13) & 0.51 (0.11)
|
||||||
|
# 500 & 0.16 (0.07) & 0.90 (0.02) & 0.90 (0.02) 0.98 (0.01) & 4 0.91 (0.01) & 0.23 (0.10) & 0.50 (0.08)
|
||||||
|
# 750 & 0.13 (0.05) & 0.90 (0.01) & 0.90 (0.01) 0.98 (0.02) & 5 0.91 (0.01) & 0.23 (0.08) & 0.53 (0.06)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if (FALSE) {
|
||||||
|
################################################################################
|
||||||
|
### Work In Progress ###
|
||||||
|
################################################################################
|
||||||
|
library(tensorPredictors)
|
||||||
|
|
||||||
|
dimX <- c(3, 3, 3)
|
||||||
|
dimF <- c(1, 1, 1)
|
||||||
|
|
||||||
|
betas <- Map(diag, 1, dimX, dimF)
|
||||||
|
Omegas <- rev(list(
|
||||||
|
toeplitz(-1 * (seq_len(dimX[1]) == 2L)),
|
||||||
|
toeplitz(seq(1, 0, len = dimX[2])),
|
||||||
|
diag(dimX[3])
|
||||||
|
))
|
||||||
|
Omega <- Reduce(kronecker, rev(Omegas))
|
||||||
|
|
||||||
|
layout(matrix(c(
|
||||||
|
1, 3, 4,
|
||||||
|
2, 3, 5,
|
||||||
|
6, 6, 6
|
||||||
|
), nrow = 3, byrow = TRUE), heights = c(8, 8, 1))
|
||||||
|
|
||||||
|
|
||||||
|
`E(X |` <- function(Y) {
|
||||||
|
array(diag(ising_m2(diag(as.vector(mlm(array(Y, dimF), betas))) + Omega)), dimX)
|
||||||
|
}
|
||||||
|
`E(X |`(Y = -2)
|
||||||
|
`E(X |`(Y = +2)
|
||||||
|
|
||||||
|
col <- hcl.colors(256, "Blue-Red 3", rev = FALSE)
|
||||||
|
matrixImage(`E(X |`(Y = -2), main = "E[X | Y = -2]", zlim = c(0, 1), col = col)
|
||||||
|
matrixImage(`E(X |`(Y = -1), main = "E[X | Y = -1]", zlim = c(0, 1), col = col)
|
||||||
|
matrixImage(`E(X |`(Y = 0), main = "E[X | Y = 0]", zlim = c(0, 1), col = col)
|
||||||
|
matrixImage(`E(X |`(Y = +1), main = "E[X | Y = +1]", zlim = c(0, 1), col = col)
|
||||||
|
matrixImage(`E(X |`(Y = +2), main = "E[X | Y = +2]", zlim = c(0, 1), col = col)
|
||||||
|
{
|
||||||
|
restor.par <- par(mar = c(1.1, 2.1, 0, 2.1))
|
||||||
|
plot(0:1, 0:1, type = "n", xlab = "", ylab = "", axes = FALSE)
|
||||||
|
rasterImage(as.raster(matrix(col, nrow = 1)), 0, 0, 1, 1)
|
||||||
|
mtext("0", side = 2, las = 1, line = -3)
|
||||||
|
mtext("1", side = 4, las = 1, line = -3)
|
||||||
|
par(restor.par)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
sample.size <- 100
|
||||||
|
c(X, F, y, sample.axis) %<-% sample.data(sample.size, betas, Omegas)
|
||||||
|
# Design matrix containing vectorized X's
|
||||||
|
vecX <- mat(X, sample.axis)
|
||||||
|
|
||||||
|
|
||||||
|
fit.gmlm <- gmlm_ising(X, F)
|
||||||
|
fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(dimF))
|
||||||
|
fit.hopca <- HOPCA(X, npc = dimF, sample.axis = sample.axis)
|
||||||
|
fit.tsir <- TSIR(X, y, dimF, sample.axis = sample.axis)
|
||||||
|
fit.mgcca <- local({
|
||||||
|
X1 <- aperm(X, c(sample.axis, seq_along(dim(X))[-sample.axis]))
|
||||||
|
F1 <- aperm(F, c(sample.axis, seq_along(dim(X))[-sample.axis]))
|
||||||
|
mgcca(
|
||||||
|
list(X1, drop(F1)), # `drop` removes 1D axis
|
||||||
|
quiet = TRUE,
|
||||||
|
scheme = "factorial",
|
||||||
|
ncomp = rep(prod(dimF), 2)
|
||||||
|
)
|
||||||
|
})
|
||||||
|
fit.lpca <- logisticPCA(vecX, k = prod(dimF), m = m)
|
||||||
|
fit.clpca <- convexLogisticPCA(vecX, k = prod(dimF), m = m)
|
||||||
|
|
||||||
|
|
||||||
|
B.gmlm <- Reduce(kronecker, rev(fit.gmlm$betas))
|
||||||
|
B.pca <- fit.pca$rotation
|
||||||
|
B.hopca <- Reduce(`%x%`, rev(fit.hopca))
|
||||||
|
B.tsir <- Reduce(`%x%`, rev(fit.tsir))
|
||||||
|
B.mgcca <- fit.mgcca$astar[[1]]
|
||||||
|
B.lpca <- fit.lpca$U
|
||||||
|
B.clpca <- fit.clpca$U
|
||||||
|
# B.lsvd <- ???
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
dist.subspace(B.true, B.gmlm, normalize = TRUE)
|
||||||
|
dist.subspace(B.true, B.pca, normalize = TRUE)
|
||||||
|
dist.subspace(B.true, B.hopca, normalize = TRUE)
|
||||||
|
dist.subspace(B.true, B.tsir, normalize = TRUE)
|
||||||
|
dist.subspace(B.true, B.mgcca, normalize = TRUE)
|
||||||
|
dist.subspace(B.true, B.lpca, normalize = TRUE)
|
||||||
|
dist.subspace(B.true, B.clpca, normalize = TRUE)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
### End - Work In Progress ###
|
||||||
|
################################################################################
|
||||||
|
}
|
Loading…
Reference in New Issue