diff --git a/sim/sim_2b_ising.R b/sim/sim_2b_ising.R new file mode 100644 index 0000000..84c34ba --- /dev/null +++ b/sim/sim_2b_ising.R @@ -0,0 +1,197 @@ +library(tensorPredictors) +library(logisticPCA) +# library(RGCCA) +# Use modified version of `RGCCA` +# Reasons (on Ubuntu 22.04 LTS): +# - compatible with `Rscript` +# - about 4 times faster for small problems +# Changes: +# - Run in main thread, avoid `parallel::makeCluster` if `n_cores == 1` +# (file "R/mgccak.R" lines 81:103) +# - added `Encoding: UTF-8` +# (file "DESCRIPTION") +suppressWarnings({ + devtools::load_all("~/Work/tensorPredictors/References/Software/TGCCA-modified", export_all = FALSE) +}) + +setwd("~/Work/tensorPredictors/sim/") +base.name <- format(Sys.time(), "sim_2b_ising-%Y%m%dT%H%M") + +# Source utility function used in most simulations (extracted for convenience) +source("./sim_utils.R") + +# Set PRNG seed for reproducability +# Note: `0x` is the HEX number prefix and the trailing `L` stands for "long" +# which is `R`s way if indicating an integer. +set.seed(seed <- 0x2bL, "Mersenne-Twister", "Inversion", "Rejection") + + +reps <- 100 # number of simulation replications +sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n` +dimX <- c(2, 3) # predictor `X` dimension +dimF <- c(2, 2) # "function" `F(y)` of responce `y` dimension + +betas <- Map(diag, 1, dimX, dimF) +Omegas <- list(toeplitz(c(0, -2)), toeplitz(seq(1, 0, by = -0.5))) + +# compute true (full) model parameters to compair estimates against +B.true <- Reduce(`%x%`, rev(betas)) + +# data sampling routine +sample.data <- function(sample.size, betas, Omegas) { + dimX <- mapply(nrow, betas) + dimF <- mapply(ncol, betas) + + # generate response (sample axis is last axis) + y <- runif(sample.size, -1, 1) + F <- aperm(array(c( + sinpi(y), -cospi(y), + cospi(y), sinpi(y) + ), dim = c(length(y), 2, 2)), c(2, 3, 1)) + + Omega <- Reduce(kronecker, rev(Omegas)) + + X <- apply(F, 3, function(Fi) { + dim(Fi) <- dimF + params <- diag(as.vector(mlm(Fi, betas))) + Omega + tensorPredictors::ising_sample(1, params) + }) + dim(X) <- c(dimX, sample.size) + + list(X = X, F = F, y = y, sample.axis = 3) +} + +# # has been run once with initial seed +# lpca.hyper.param <- local({ +# c(X, F, y, sample.axis) %<-% sample.data(1e3, betas, Omegas) +# vecX <- mat(X, sample.axis) +# CV <- cv.lpca(vecX, ks = prod(dimF), ms = seq(1, 30, by = 1)) +# # plot(CV) +# as.numeric(colnames(CV))[which.min(CV)] +# }) +# set.seed(seed, "Mersenne-Twister", "Inversion", "Rejection") +lpca.hyper.param <- 23 + + +# Create a CSV logger to write simulation results to +log.file <- paste(base.name, "csv", sep = ".") +logger <- CSV.logger( + file.name = log.file, + header = c("sample.size", "rep", outer( + c("time", "dist.subspace", "dist.projection"), # < measures, v methods + c("gmlm", "tnormal", "pca", "hopca", "lpca", "clpca", "tsir", "mgcca"), + paste, sep = "." + )) +) + + +### for each sample size +for (sample.size in sample.sizes) { + # repeate every simulation + for (rep in seq_len(reps)) { + # Sample training data + c(X, F, y, sample.axis) %<-% sample.data(sample.size, betas, Omegas) + + # start timing for reporting + start.timer() + + # fit different models + # Wrapped in try-catch clock to ensure the simulation continues, + # if an error occures continue with nest resplication and log an error message + try.catch.block <- tryCatch({ + time.gmlm <- system.time( + fit.gmlm <- gmlm_ising(X, F, y, sample.axis = sample.axis) + )["user.self"] + time.tnormal <- -1 # part of Ising gmlm (not relevent here) + time.pca <- system.time( + fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(dimF)) + )["user.self"] + time.hopca <- system.time( + fit.hopca <- HOPCA(X, npc = dimF, sample.axis = sample.axis) + )["user.self"] + time.lpca <- system.time( + fit.lpca <- logisticPCA(mat(X, sample.axis), k = prod(dimF), + m = lpca.hyper.param) + )["user.self"] + time.clpca <- system.time( + fit.clpca <- convexLogisticPCA(mat(X, sample.axis), k = prod(dimF), + m = lpca.hyper.param) + )["user.self"] + time.tsir <- system.time( + fit.tsir <- TSIR(X, y, dimF, sample.axis = sample.axis) + )["user.self"] + # `mgcca` expects the first axis to be the sample axis + X1 <- aperm(X, c(sample.axis, seq_along(dim(X))[-sample.axis])) + F1 <- cbind(sinpi(y), cospi(y)) + time.mgcca <- system.time( + fit.mgcca <- mgcca(list(X1, F1), ncomp = c(prod(dimF), 1), + quiet = TRUE, scheme = "factorial") + )["user.self"] + }, error = print) + + # Get elapsed time from last timer start and the accumulated total time + # (_not_ a precide timing, only to get an idea) + c(elapsed, total.time) %<-% end.timer() + + # Drop comparison in case any error (in any fitting routine) + if (inherits(try.catch.block, "error")) { next } + + # Compute true reduction matrix + B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(betas))) + B.tnormal <- with(attr(fit.gmlm, "tensor_normal"), Reduce(`%x%`, rev(betas))) + B.pca <- fit.pca$rotation + B.hopca <- Reduce(`%x%`, rev(fit.hopca)) + B.lpca <- fit.lpca$U + B.clpca <- fit.clpca$U + B.tsir <- Reduce(`%x%`, rev(fit.tsir)) + B.mgcca <- fit.mgcca$astar[[1]] + + # Subspace Distances: Normalized `|| P_A - P_B ||_F` where + # `P_A = A (A' A)^-1 A'` and the normalization means that with + # respect to the dimensions of `A, B` the subspace distance is in the + # range `[0, 1]`. + dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE) + dist.subspace.tnormal <- dist.subspace(B.true, B.tnormal, normalize = TRUE) + dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE) + dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE) + dist.subspace.lpca <- dist.subspace(B.true, B.lpca, normalize = TRUE) + dist.subspace.clpca <- dist.subspace(B.true, B.clpca, normalize = TRUE) + dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE) + dist.subspace.mgcca <- dist.subspace(B.true, B.mgcca, normalize = TRUE) + + # # Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`. + dist.projection.gmlm <- dist.projection(B.true, B.gmlm) + dist.projection.tnormal <- dist.projection(B.true, B.tnormal) + dist.projection.pca <- dist.projection(B.true, B.pca) + dist.projection.hopca <- dist.projection(B.true, B.hopca) + dist.projection.lpca <- dist.projection(B.true, B.lpca) + dist.projection.clpca <- dist.projection(B.true, B.clpca) + dist.projection.tsir <- dist.projection(B.true, B.tsir) + dist.projection.mgcca <- dist.projection(B.true, B.mgcca) + + # Call CSV logger writing results to file + logger() + + # print progress + cat(sprintf("sample size (%d): %d/%d - rep: %d/%d - elapsed: %.1f [s], total: %.0f [s]\n", + sample.size, which(sample.size == sample.sizes), + length(sample.sizes), rep, reps, elapsed, total.time)) + } +} + + +### read simulation results generate plots +if (!interactive()) { pdf(file = paste(base.name, "pdf", sep = ".")) } + +sim <- read.csv(log.file) + + +plot.sim(sim, "dist.subspace", main = "Subspace Distance", + xlab = "Sample Size", ylab = "Distance") + +plot.sim(sim, "dist.projection", main = "Projection Distance", + xlab = "Sample Size", ylab = "Distance") + +plot.sim(sim, "time", main = "Runtime", + xlab = "Sample Size", ylab = "Time [s]", + ylim = c(0, max(sim[startsWith(names(sim), "time")]))) diff --git a/sim/sim_2c_ising.R b/sim/sim_2c_ising.R new file mode 100644 index 0000000..66c70d7 --- /dev/null +++ b/sim/sim_2c_ising.R @@ -0,0 +1,195 @@ +library(tensorPredictors) +library(logisticPCA) +# library(RGCCA) +# Use modified version of `RGCCA` +# Reasons (on Ubuntu 22.04 LTS): +# - compatible with `Rscript` +# - about 4 times faster for small problems +# Changes: +# - Run in main thread, avoid `parallel::makeCluster` if `n_cores == 1` +# (file "R/mgccak.R" lines 81:103) +# - added `Encoding: UTF-8` +# (file "DESCRIPTION") +suppressWarnings({ + devtools::load_all("~/Work/tensorPredictors/References/Software/TGCCA-modified", export_all = FALSE) +}) + +setwd("~/Work/tensorPredictors/sim/") +base.name <- format(Sys.time(), "sim_2c_ising-%Y%m%dT%H%M") + +# Source utility function used in most simulations (extracted for convenience) +source("./sim_utils.R") + +# Set PRNG seed for reproducability +# Note: `0x` is the HEX number prefix and the trailing `L` stands for "long" +# which is `R`s way if indicating an integer. +set.seed(seed <- 0x2cL, "Mersenne-Twister", "Inversion", "Rejection") + + +reps <- 100 # number of simulation replications +sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n` +dimX <- c(2, 3) # predictor `X` dimension +dimF <- c(2, 2) # "function" `F(y)` of responce `y` dimension + +betas <- list( + `[<-`(matrix(0, dimX[1], dimF[1]), 1, , c(1, 1)), + `[<-`(matrix(0, dimX[2], dimF[2]), 2, , c(1, -1)) +) +Omegas <- list(toeplitz(c(0, -2)), toeplitz(seq(1, 0, by = -0.5))) + +# compute true (full) model parameters to compair estimates against +B.true <- as.matrix(as.numeric((1:6) == 3)) + +# define projections onto rank 1 matrices for betas +proj.betas <- Map(.projRank, rep(1L, length(betas))) + + +# data sampling routine +sample.data <- function(sample.size, betas, Omegas) { + dimX <- mapply(nrow, betas) + dimF <- mapply(ncol, betas) + + # generate response (sample axis is last axis) + y <- runif(sample.size, -1, 1) + F <- aperm(array(c( + sinpi(y), -cospi(y), + cospi(y), sinpi(y) + ), dim = c(length(y), 2, 2)), c(2, 3, 1)) + + Omega <- Reduce(kronecker, rev(Omegas)) + + X <- apply(F, 3, function(Fi) { + dim(Fi) <- dimF + params <- diag(as.vector(mlm(Fi, betas))) + Omega + tensorPredictors::ising_sample(1, params) + }) + dim(X) <- c(dimX, sample.size) + + list(X = X, F = F, y = y, sample.axis = 3) +} + +# # has been run once with initial seed +# lpca.hyper.param <- local({ +# c(X, F, y, sample.axis) %<-% sample.data(1e3, betas, Omegas) +# vecX <- mat(X, sample.axis) +# CV <- cv.lpca(vecX, ks = prod(dimF), ms = seq(1, 30, by = 1)) +# # plot(CV) +# as.numeric(colnames(CV))[which.min(CV)] +# }) +# set.seed(seed, "Mersenne-Twister", "Inversion", "Rejection") +lpca.hyper.param <- 26 + + +# Create a CSV logger to write simulation results to +log.file <- paste(base.name, "csv", sep = ".") +logger <- CSV.logger( + file.name = log.file, + header = c("sample.size", "rep", outer( + c("time", "dist.subspace"), # < measures, v methods + c("gmlm", "tnormal", "pca", "hopca", "lpca", "clpca", "tsir", "mgcca"), + paste, sep = "." + )) +) + + +### for each sample size +for (sample.size in sample.sizes) { + # repeate every simulation + for (rep in seq_len(reps)) { + # Sample training data + c(X, F, y, sample.axis) %<-% sample.data(sample.size, betas, Omegas) + + # start timing for reporting + start.timer() + + # fit different models + # Wrapped in try-catch clock to ensure the simulation continues, + # if an error occures continue with nest resplication and log an error message + try.catch.block <- tryCatch({ + time.gmlm <- system.time( + fit.gmlm <- gmlm_ising(X, F, y, sample.axis = sample.axis, + proj.betas = proj.betas) + )["user.self"] + time.tnormal <- -1 # part of Ising gmlm (not relevent here) + time.pca <- system.time( + fit.pca <- prcomp(mat(X, sample.axis), rank. = 1L) + )["user.self"] + time.hopca <- system.time( + fit.hopca <- HOPCA(X, npc = c(1L, 1L), sample.axis = sample.axis) + )["user.self"] + time.lpca <- system.time( + fit.lpca <- logisticPCA(mat(X, sample.axis), k = 1L, + m = lpca.hyper.param) + )["user.self"] + time.clpca <- system.time( + fit.clpca <- convexLogisticPCA(mat(X, sample.axis), k = 1L, + m = lpca.hyper.param) + )["user.self"] + time.tsir <- system.time( + fit.tsir <- TSIR(X, y, d = c(1L, 1L), sample.axis = sample.axis) + )["user.self"] + # `mgcca` expects the first axis to be the sample axis + X1 <- aperm(X, c(sample.axis, seq_along(dim(X))[-sample.axis])) + F1 <- cbind(sinpi(y), cospi(y)) + time.mgcca <- system.time( + fit.mgcca <- mgcca(list(X1, F1), ncomp = c(1L, 1L), + quiet = TRUE, scheme = "factorial") + )["user.self"] + }, error = print) + + # Get elapsed time from last timer start and the accumulated total time + # (_not_ a precide timing, only to get an idea) + c(elapsed, total.time) %<-% end.timer() + + # Drop comparison in case any error (in any fitting routine) + if (inherits(try.catch.block, "error")) { next } + + # Compute true reduction matrix + B.gmlm <- qr.Q(qr(with(fit.gmlm, Reduce(`%x%`, rev(betas)))))[, 1L, drop = FALSE] + B.tnormal <- qr.Q(qr(with(attr(fit.gmlm, "tensor_normal"), Reduce(`%x%`, rev(betas)))))[, 1L, drop = FALSE] + B.pca <- fit.pca$rotation + B.hopca <- Reduce(`%x%`, rev(fit.hopca)) + B.lpca <- fit.lpca$U + B.clpca <- fit.clpca$U + B.tsir <- Reduce(`%x%`, rev(fit.tsir)) + B.mgcca <- fit.mgcca$astar[[1]] + + # Subspace Distances: Normalized `|| P_A - P_B ||_F` where + # `P_A = A (A' A)^-1 A'` and the normalization means that with + # respect to the dimensions of `A, B` the subspace distance is in the + # range `[0, 1]`. + dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE) + dist.subspace.tnormal <- dist.subspace(B.true, B.tnormal, normalize = TRUE) + dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE) + dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE) + dist.subspace.lpca <- dist.subspace(B.true, B.lpca, normalize = TRUE) + dist.subspace.clpca <- dist.subspace(B.true, B.clpca, normalize = TRUE) + dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE) + dist.subspace.mgcca <- dist.subspace(B.true, B.mgcca, normalize = TRUE) + + # Call CSV logger writing results to file + logger() + + # print progress + cat(sprintf("sample size (%d): %d/%d - rep: %d/%d - elapsed: %.1f [s], total: %.0f [s]\n", + sample.size, which(sample.size == sample.sizes), + length(sample.sizes), rep, reps, elapsed, total.time)) + } +} + + +### read simulation results generate plots +if (!interactive()) { pdf(file = paste(base.name, "pdf", sep = ".")) } + +sim <- read.csv(log.file) + + +plot.sim(sim, "dist.subspace", main = "Subspace Distance", + xlab = "Sample Size", ylab = "Distance") + +# plot.sim(sim, "dist.projection", main = "Projection Distance", +# xlab = "Sample Size", ylab = "Distance") + +plot.sim(sim, "time", main = "Runtime", + xlab = "Sample Size", ylab = "Time [s]", + ylim = c(0, max(sim[startsWith(names(sim), "time")]))) diff --git a/sim/sim_2d_ising.R b/sim/sim_2d_ising.R new file mode 100644 index 0000000..705f22e --- /dev/null +++ b/sim/sim_2d_ising.R @@ -0,0 +1,213 @@ +library(tensorPredictors) +library(logisticPCA) +# library(RGCCA) +# Use modified version of `RGCCA` +# Reasons (on Ubuntu 22.04 LTS): +# - compatible with `Rscript` +# - about 4 times faster for small problems +# Changes: +# - Run in main thread, avoid `parallel::makeCluster` if `n_cores == 1` +# (file "R/mgccak.R" lines 81:103) +# - added `Encoding: UTF-8` +# (file "DESCRIPTION") +suppressWarnings({ + devtools::load_all("~/Work/tensorPredictors/References/Software/TGCCA-modified", export_all = FALSE) +}) + +setwd("~/Work/tensorPredictors/sim/") +base.name <- format(Sys.time(), "sim_2d_ising-%Y%m%dT%H%M") + +# Source utility function used in most simulations (extracted for convenience) +source("./sim_utils.R") + +# Set PRNG seed for reproducability +# Note: `0x` is the HEX number prefix and the trailing `L` stands for "long" +# which is `R`s way if indicating an integer. +set.seed(seed <- 0x2dL, "Mersenne-Twister", "Inversion", "Rejection") + + +reps <- 100 # number of simulation replications +sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n` +dimX <- c(2, 3) # predictor `X` dimension +dimF <- c(2, 2) # "function" `F(y)` of responce `y` dimension + +betas <- Map(diag, 1, dimX, dimF) +# # All identical couplings with log odds of 1, that is approx +# # `P(X_i = 1, X_j = 1 | X_-i,-j = 0) ~ 3 / 4` +# Omegas <- Map(function(dim) `diag<-`(matrix(1, dim, dim), 0), dimX) +Omegas <- list( + `diag<-`(matrix(0.5, dimX[1], dimX[1]), 0), + diag(dimX[2]) +) + +# compute true (full) model parameters to compair estimates against +B.true <- Reduce(`%x%`, rev(betas)) + +# Build projections onto `all elements are equal except diagonal is zero` matrices +# proj.Omegas <- Map(function(Omega) { +# proj <- as.vector(Omega) %*% pinv(as.vector(Omega)) +# function(Omega) { +# matrix(proj %*% as.vector(Omega), nrow = nrow(Omega)) +# } +# }, Omegas) +proj.Omegas <- Map(.projMaskedMean, Map(as.logical, Omegas)) + +# data sampling routine +sample.data <- function(sample.size, betas, Omegas) { + dimX <- mapply(nrow, betas) + dimF <- mapply(ncol, betas) + + # generate response (sample axis is last axis) + y <- runif(sample.size, -1, 1) + F <- aperm(array(c( + sinpi(y), -cospi(y), + cospi(y), sinpi(y) + ), dim = c(length(y), 2, 2)), c(2, 3, 1)) + + Omega <- Reduce(kronecker, rev(Omegas)) + + X <- apply(F, 3, function(Fi) { + dim(Fi) <- dimF + params <- diag(as.vector(mlm(Fi, betas))) + Omega + tensorPredictors::ising_sample(1, params) + }) + dim(X) <- c(dimX, sample.size) + + list(X = X, F = F, y = y, sample.axis = 3) +} + +# # has been run once with initial seed +# lpca.hyper.param <- local({ +# c(X, F, y, sample.axis) %<-% sample.data(1e3, betas, Omegas) +# vecX <- mat(X, sample.axis) +# CV <- cv.lpca(vecX, ks = prod(dimF), ms = seq(1, 30, by = 1)) +# # plot(CV) +# as.numeric(colnames(CV))[which.min(CV)] +# }) +# set.seed(seed, "Mersenne-Twister", "Inversion", "Rejection") +lpca.hyper.param <- 10 + + +# Create a CSV logger to write simulation results to +log.file <- paste(base.name, "csv", sep = ".") +logger <- CSV.logger( + file.name = log.file, + header = c("sample.size", "rep", outer( + c("time", "dist.subspace", "dist.projection"), # < measures, v methods + c("gmlm", "tnormal", "pca", "hopca", "lpca", "clpca", "tsir", "mgcca"), + paste, sep = "." + )) +) + + +### for each sample size +for (sample.size in sample.sizes) { + # repeate every simulation + for (rep in seq_len(reps)) { + # Sample training data + c(X, F, y, sample.axis) %<-% sample.data(sample.size, betas, Omegas) + + # start timing for reporting + start.timer() + + # fit different models + # Wrapped in try-catch clock to ensure the simulation continues, + # if an error occures continue with nest resplication and log an error message + try.catch.block <- tryCatch({ + time.gmlm <- system.time( + fit.gmlm <- gmlm_ising(X, F, y, sample.axis = sample.axis, + proj.Omegas = proj.Omegas) + )["user.self"] + time.tnormal <- -1 # part of Ising gmlm (not relevent here) + time.pca <- system.time( + fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(dimF)) + )["user.self"] + time.hopca <- system.time( + fit.hopca <- HOPCA(X, npc = dimF, sample.axis = sample.axis) + )["user.self"] + time.lpca <- system.time( + fit.lpca <- logisticPCA(mat(X, sample.axis), k = prod(dimF), + m = lpca.hyper.param) + )["user.self"] + time.clpca <- system.time( + fit.clpca <- convexLogisticPCA(mat(X, sample.axis), k = prod(dimF), + m = lpca.hyper.param) + )["user.self"] + time.tsir <- system.time( + fit.tsir <- TSIR(X, y, d = dimF, sample.axis = sample.axis) + )["user.self"] + # `mgcca` expects the first axis to be the sample axis + X1 <- aperm(X, c(sample.axis, seq_along(dim(X))[-sample.axis])) + F1 <- cbind(sinpi(y), cospi(y)) + time.mgcca <- system.time( + fit.mgcca <- mgcca(list(X1, F1), ncomp = c(prod(dimF), 1L), + quiet = TRUE, scheme = "factorial") + )["user.self"] + }, error = print) + + # Get elapsed time from last timer start and the accumulated total time + # (_not_ a precide timing, only to get an idea) + c(elapsed, total.time) %<-% end.timer() + + # Drop comparison in case any error (in any fitting routine) + if (inherits(try.catch.block, "error")) { next } + + # Compute true reduction matrix + B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(betas))) + B.tnormal <- with(attr(fit.gmlm, "tensor_normal"), Reduce(`%x%`, rev(betas))) + B.pca <- fit.pca$rotation + B.hopca <- Reduce(`%x%`, rev(fit.hopca)) + B.lpca <- fit.lpca$U + B.clpca <- fit.clpca$U + B.tsir <- Reduce(`%x%`, rev(fit.tsir)) + B.mgcca <- fit.mgcca$astar[[1]] + + # Subspace Distances: Normalized `|| P_A - P_B ||_F` where + # `P_A = A (A' A)^-1 A'` and the normalization means that with + # respect to the dimensions of `A, B` the subspace distance is in the + # range `[0, 1]`. + dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE) + dist.subspace.tnormal <- dist.subspace(B.true, B.tnormal, normalize = TRUE) + dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE) + dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE) + dist.subspace.lpca <- dist.subspace(B.true, B.lpca, normalize = TRUE) + dist.subspace.clpca <- dist.subspace(B.true, B.clpca, normalize = TRUE) + dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE) + dist.subspace.mgcca <- dist.subspace(B.true, B.mgcca, normalize = TRUE) + + # Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`. + dist.projection.gmlm <- dist.projection(B.true, B.gmlm) + dist.projection.tnormal <- dist.projection(B.true, B.tnormal) + dist.projection.pca <- dist.projection(B.true, B.pca) + dist.projection.hopca <- dist.projection(B.true, B.hopca) + dist.projection.lpca <- dist.projection(B.true, B.lpca) + dist.projection.clpca <- dist.projection(B.true, B.clpca) + dist.projection.tsir <- dist.projection(B.true, B.tsir) + dist.projection.mgcca <- dist.projection(B.true, B.mgcca) + + # Call CSV logger writing results to file + logger() + + # print progress + cat(sprintf("sample size (%d): %d/%d - rep: %d/%d - elapsed: %.1f [s], total: %.0f [s]\n", + sample.size, which(sample.size == sample.sizes), + length(sample.sizes), rep, reps, elapsed, total.time)) + } +} + + +### read simulation results generate plots +if (!interactive()) { pdf(file = paste(base.name, "pdf", sep = ".")) } + +sim <- read.csv(log.file) + + +plot.sim(sim, "dist.subspace", main = "Subspace Distance", + xlab = "Sample Size", ylab = "Distance") + +# plot.sim(sim, "dist.projection", main = "Projection Distance", +# xlab = "Sample Size", ylab = "Distance") + +plot.sim(sim, "time", main = "Runtime", + xlab = "Sample Size", ylab = "Time [s]", + ylim = c(0, max(sim[startsWith(names(sim), "time")]))) diff --git a/sim/sim_2e_ising.R b/sim/sim_2e_ising.R new file mode 100644 index 0000000..d6a05b4 --- /dev/null +++ b/sim/sim_2e_ising.R @@ -0,0 +1,244 @@ +library(tensorPredictors) +# library(logisticPCA) +# # library(RGCCA) +# # Use modified version of `RGCCA` +# # Reasons (on Ubuntu 22.04 LTS): +# # - compatible with `Rscript` +# # - about 4 times faster for small problems +# # Changes: +# # - Run in main thread, avoid `parallel::makeCluster` if `n_cores == 1` +# # (file "R/mgccak.R" lines 81:103) +# # - added `Encoding: UTF-8` +# # (file "DESCRIPTION") +# suppressWarnings({ +# devtools::load_all("~/Work/tensorPredictors/References/Software/TGCCA-modified", export_all = FALSE) +# }) + +# setwd("~/Work/tensorPredictors/sim/") +# base.name <- format(Sys.time(), "sim_2e_ising-%Y%m%dT%H%M") + +# # Source utility function used in most simulations (extracted for convenience) +# source("./sim_utils.R") + +# # Set PRNG seed for reproducability +# # Note: `0x` is the HEX number prefix and the trailing `L` stands for "long" +# # which is `R`s way if indicating an integer. +# set.seed(seed <- 0x2eL, "Mersenne-Twister", "Inversion", "Rejection") + + +reps <- 100 # number of simulation replications +sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n` +dimX <- c(5, 5, 5) # predictor `X` dimension +dimF <- c(2, 2, 2) # "function" `F(y)` of responce `y` dimension + +betas <- Map(matrix, 1, dimX, dimF) + +Omegas <- Map(function(p) `diag<-`(matrix(0.5, p, p), 0), dimX) + + +# data sampling routine +sample.data <- function(sample.size, betas, Omegas) { + dimX <- mapply(nrow, betas) + dimF <- mapply(ncol, betas) + + # generate response (sample axis is last axis) + y <- runif(sample.size, -1, 1) + F <- aperm(array(c( + +sinpi(y), +sinpi(2 * y), + +cospi(y), +cospi(2 * y), + -cospi(y), -cospi(2 * y), + +sinpi(y), +sinpi(2 * y) + ), dim = c(length(y), 2, 2, 2)), c(2, 3, 4, 1)) + + Omega <- Reduce(kronecker, rev(Omegas)) + + X <- apply(F, length(dim(F)), function(Fi) { + dim(Fi) <- dimF + params <- diag(as.vector(mlm(Fi, betas))) + Omega + tensorPredictors::ising_sample(1, params) + }) + dim(X) <- c(dimX, sample.size) + + list(X = X, F = F, y = y, sample.axis = 3) +} + +sample.size <- 100L + +# Sample training data +c(X, F, y, sample.axis) %<-% sample.data(sample.size, betas, Omegas) + + + + + +# compute true (full) model parameters to compair estimates against +B.true <- Reduce(`%x%`, rev(betas)) + +# Build projections onto `all elements are equal except diagonal is zero` matrices +# proj.Omegas <- Map(function(Omega) { +# proj <- as.vector(Omega) %*% pinv(as.vector(Omega)) +# function(Omega) { +# matrix(proj %*% as.vector(Omega), nrow = nrow(Omega)) +# } +# }, Omegas) +proj.Omegas <- Map(.projMaskedMean, Map(as.logical, Omegas)) + +# data sampling routine +sample.data <- function(sample.size, betas, Omegas) { + dimX <- mapply(nrow, betas) + dimF <- mapply(ncol, betas) + + # generate response (sample axis is last axis) + y <- runif(sample.size, -1, 1) + F <- aperm(array(c( + sinpi(y), -cospi(y), + cospi(y), sinpi(y) + ), dim = c(length(y), 2, 2)), c(2, 3, 1)) + + Omega <- Reduce(kronecker, rev(Omegas)) + + X <- apply(F, 3, function(Fi) { + dim(Fi) <- dimF + params <- diag(as.vector(mlm(Fi, betas))) + Omega + tensorPredictors::ising_sample(1, params) + }) + dim(X) <- c(dimX, sample.size) + + list(X = X, F = F, y = y, sample.axis = 3) +} + +# # has been run once with initial seed +# lpca.hyper.param <- local({ +# c(X, F, y, sample.axis) %<-% sample.data(1e3, betas, Omegas) +# vecX <- mat(X, sample.axis) +# CV <- cv.lpca(vecX, ks = prod(dimF), ms = seq(1, 30, by = 1)) +# # plot(CV) +# as.numeric(colnames(CV))[which.min(CV)] +# }) +# set.seed(seed, "Mersenne-Twister", "Inversion", "Rejection") +lpca.hyper.param <- 10 + + +# Create a CSV logger to write simulation results to +log.file <- paste(base.name, "csv", sep = ".") +logger <- CSV.logger( + file.name = log.file, + header = c("sample.size", "rep", outer( + c("time", "dist.subspace", "dist.projection"), # < measures, v methods + c("gmlm", "tnormal", "pca", "hopca", "lpca", "clpca", "tsir", "mgcca"), + paste, sep = "." + )) +) + + +### for each sample size +for (sample.size in sample.sizes) { + # repeate every simulation + for (rep in seq_len(reps)) { + # Sample training data + c(X, F, y, sample.axis) %<-% sample.data(sample.size, betas, Omegas) + + # start timing for reporting + start.timer() + + # fit different models + # Wrapped in try-catch clock to ensure the simulation continues, + # if an error occures continue with nest resplication and log an error message + try.catch.block <- tryCatch({ + time.gmlm <- system.time( + fit.gmlm <- gmlm_ising(X, F, y, sample.axis = sample.axis, + proj.Omegas = proj.Omegas) + )["user.self"] + time.tnormal <- -1 # part of Ising gmlm (not relevent here) + time.pca <- system.time( + fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(dimF)) + )["user.self"] + time.hopca <- system.time( + fit.hopca <- HOPCA(X, npc = dimF, sample.axis = sample.axis) + )["user.self"] + time.lpca <- system.time( + fit.lpca <- logisticPCA(mat(X, sample.axis), k = prod(dimF), + m = lpca.hyper.param) + )["user.self"] + time.clpca <- system.time( + fit.clpca <- convexLogisticPCA(mat(X, sample.axis), k = prod(dimF), + m = lpca.hyper.param) + )["user.self"] + time.tsir <- system.time( + fit.tsir <- TSIR(X, y, d = dimF, sample.axis = sample.axis) + )["user.self"] + # `mgcca` expects the first axis to be the sample axis + X1 <- aperm(X, c(sample.axis, seq_along(dim(X))[-sample.axis])) + F1 <- cbind(sinpi(y), cospi(y)) + time.mgcca <- system.time( + fit.mgcca <- mgcca(list(X1, F1), ncomp = c(prod(dimF), 1L), + quiet = TRUE, scheme = "factorial") + )["user.self"] + }, error = print) + + # Get elapsed time from last timer start and the accumulated total time + # (_not_ a precide timing, only to get an idea) + c(elapsed, total.time) %<-% end.timer() + + # Drop comparison in case any error (in any fitting routine) + if (inherits(try.catch.block, "error")) { next } + + # Compute true reduction matrix + B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(betas))) + B.tnormal <- with(attr(fit.gmlm, "tensor_normal"), Reduce(`%x%`, rev(betas))) + B.pca <- fit.pca$rotation + B.hopca <- Reduce(`%x%`, rev(fit.hopca)) + B.lpca <- fit.lpca$U + B.clpca <- fit.clpca$U + B.tsir <- Reduce(`%x%`, rev(fit.tsir)) + B.mgcca <- fit.mgcca$astar[[1]] + + # Subspace Distances: Normalized `|| P_A - P_B ||_F` where + # `P_A = A (A' A)^-1 A'` and the normalization means that with + # respect to the dimensions of `A, B` the subspace distance is in the + # range `[0, 1]`. + dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE) + dist.subspace.tnormal <- dist.subspace(B.true, B.tnormal, normalize = TRUE) + dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE) + dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE) + dist.subspace.lpca <- dist.subspace(B.true, B.lpca, normalize = TRUE) + dist.subspace.clpca <- dist.subspace(B.true, B.clpca, normalize = TRUE) + dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE) + dist.subspace.mgcca <- dist.subspace(B.true, B.mgcca, normalize = TRUE) + + # Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`. + dist.projection.gmlm <- dist.projection(B.true, B.gmlm) + dist.projection.tnormal <- dist.projection(B.true, B.tnormal) + dist.projection.pca <- dist.projection(B.true, B.pca) + dist.projection.hopca <- dist.projection(B.true, B.hopca) + dist.projection.lpca <- dist.projection(B.true, B.lpca) + dist.projection.clpca <- dist.projection(B.true, B.clpca) + dist.projection.tsir <- dist.projection(B.true, B.tsir) + dist.projection.mgcca <- dist.projection(B.true, B.mgcca) + + # Call CSV logger writing results to file + logger() + + # print progress + cat(sprintf("sample size (%d): %d/%d - rep: %d/%d - elapsed: %.1f [s], total: %.0f [s]\n", + sample.size, which(sample.size == sample.sizes), + length(sample.sizes), rep, reps, elapsed, total.time)) + } +} + + +### read simulation results generate plots +if (!interactive()) { pdf(file = paste(base.name, "pdf", sep = ".")) } + +sim <- read.csv(log.file) + + +plot.sim(sim, "dist.subspace", main = "Subspace Distance", + xlab = "Sample Size", ylab = "Distance") + +# plot.sim(sim, "dist.projection", main = "Projection Distance", +# xlab = "Sample Size", ylab = "Distance") + +plot.sim(sim, "time", main = "Runtime", + xlab = "Sample Size", ylab = "Time [s]", + ylim = c(0, max(sim[startsWith(names(sim), "time")])))