# library(tensorPredictors) devtools::load_all("~/Work/tensorPredictors/tensorPredictors", export_all = FALSE) library(logisticPCA) # library(RGCCA) # Use modified version of `RGCCA` # Reasons (on Ubuntu 22.04 LTS): # - compatible with `Rscript` # - about 4 times faster for small problems # Changes: # - Run in main thread, avoid `parallel::makeCluster` if `n_cores == 1` # (file "R/mgccak.R" lines 81:103) # - added `Encoding: UTF-8` # (file "DESCRIPTION") suppressWarnings({ devtools::load_all("~/Work/tensorPredictors/References/Software/TGCCA-modified", export_all = FALSE) }) setwd("~/Work/tensorPredictors/sim/") base.name <- format(Sys.time(), "sim_2a_ising-%Y%m%dT%H%M") # Source utility function used in most simulations (extracted for convenience) source("./sim_utils.R") # Set PRNG seed for reproducability # Note: `0x` is the HEX number prefix and the trailing `L` stands for "long" # which is `R`s way if indicating an integer. set.seed(0x2aL, "Mersenne-Twister", "Inversion", "Rejection") reps <- 100 # number of simulation replications sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n` dimX <- c(2, 3) # predictor `X` dimension dimF <- rep(1, length(dimX)) # "function" `F(y)` of responce `y` dimension betas <- Map(diag, 1, dimX, dimF) Omegas <- list(toeplitz(c(0, -2)), toeplitz(seq(1, 0, by = -0.5))) # data sampling routine sample.data <- function(sample.size, betas, Omegas) { dimX <- mapply(nrow, betas) dimF <- mapply(ncol, betas) # generate response (sample axis is last axis) y <- runif(prod(sample.size, dimF), -2, 2) F <- array(y, dim = c(dimF, sample.size)) # ~ U[-1, 1] Omega <- Reduce(kronecker, rev(Omegas)) X <- apply(F, length(dim(F)), function(Fi) { dim(Fi) <- dimF params <- diag(as.vector(mlm(Fi, betas))) + Omega tensorPredictors::ising_sample(1, params) }) dim(X) <- c(dimX, sample.size) list(X = X, F = F, y = y, sample.axis = length(dim(X))) } lpca.hyper.param <- local({ c(X, F, y, sample.axis) %<-% sample.data(1e3, betas, Omegas) vecX <- mat(X, sample.axis) CV <- cv.lpca(vecX, ks = prod(dimF), ms = seq(1, 20, by = 0.5)) # plot(CV) as.numeric(colnames(CV))[which.min(CV)] }) # Create a CSV logger to write simulation results to log.file <- paste(base.name, "csv", sep = ".") logger <- CSV.logger( file.name = log.file, header = c("sample.size", "rep", outer( c("time", "dist.subspace"), # measures c("gmlm", "tnormal", "pca", "hopca", "lpca", "clpca", "tsir", "mgcca"), # methods paste, sep = "." )) ) # compute true (full) model parameters to compair estimates against B.true <- Reduce(`%x%`, rev(betas)) ### for each sample size for (sample.size in sample.sizes) { # repeate every simulation for (rep in seq_len(reps)) { # Sample training data c(X, F, y, sample.axis) %<-% sample.data(sample.size, betas, Omegas) # fit different models # Wrapped in try-catch clock to ensure the simulation continues, # if an error occures continue with nest resplication and log an error message try.catch.block <- tryCatch({ time.gmlm <- system.time( fit.gmlm <- gmlm_ising(X, F, sample.axis = sample.axis) )["user.self"] time.tnormal <- -1 # part of Ising gmlm (not relevent here) time.pca <- system.time( fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(dimF)) )["user.self"] time.hopca <- system.time( fit.hopca <- HOPCA(X, npc = dimF, sample.axis = sample.axis) )["user.self"] time.lpca <- system.time( fit.lpca <- logisticPCA(mat(X, sample.axis), k = prod(dimF), m = lpca.hyper.param) )["user.self"] time.clpca <- system.time( fit.clpca <- convexLogisticPCA(mat(X, sample.axis), k = prod(dimF), m = lpca.hyper.param) )["user.self"] time.tsir <- system.time( fit.tsir <- TSIR(X, y, dimF, sample.axis = sample.axis) )["user.self"] # `mgcca` expects the first axis to be the sample axis X1 <- aperm(X, c(sample.axis, seq_along(dim(X))[-sample.axis])) time.mgcca <- system.time( fit.mgcca <- mgcca( list(X1, y), # `drop` removes 1D axis quiet = TRUE, scheme = "factorial", ncomp = c(1, 1) ) )["user.self"] }, error = print) # Drop comparison in case any error (in any fitting routine) if (inherits(try.catch.block, "error")) { next } # Compute true reduction matrix B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(betas))) B.tnormal <- with(attr(fit.gmlm, "tensor_normal"), Reduce(`%x%`, rev(betas))) B.pca <- fit.pca$rotation B.hopca <- Reduce(`%x%`, rev(fit.hopca)) B.lpca <- fit.lpca$U B.clpca <- fit.clpca$U B.tsir <- Reduce(`%x%`, rev(fit.tsir)) B.mgcca <- fit.mgcca$astar[[1]] # Subspace Distances: Normalized `|| P_A - P_B ||_F` where # `P_A = A (A' A)^-1 A'` and the normalization means that with # respect to the dimensions of `A, B` the subspace distance is in the # range `[0, 1]`. dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE) dist.subspace.tnormal <- dist.subspace(B.true, B.tnormal, normalize = TRUE) dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE) dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE) dist.subspace.lpca <- dist.subspace(B.true, B.lpca, normalize = TRUE) dist.subspace.clpca <- dist.subspace(B.true, B.clpca, normalize = TRUE) dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE) dist.subspace.mgcca <- dist.subspace(B.true, B.mgcca, normalize = TRUE) # # Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`. # equiv to Subspace distance in this case # Call CSV logger writing results to file logger() # print progress cat(sprintf("sample size (%d): %d/%d - rep: %d/%d\n", sample.size, which(sample.size == sample.sizes), length(sample.sizes), rep, reps)) } } ### read simulation results generate plots if (!interactive()) { pdf(file = paste(base.name, "pdf", sep = ".")) } sim <- read.csv(log.file) plot.sim(sim, "dist.subspace", main = "Subspace Distance", xlab = "Sample Size", ylab = "Distance") # plot.sim(sim, "dist.projection", main = "Projection Distance", # xlab = "Sample Size", ylab = "Distance") plot.sim(sim, "time", main = "Runtime", xlab = "Sample Size", ylab = "Time [s]", ylim = c(0, 18)) # aggr <- aggregate(sim, list(sim$sample.size), sd) # stats <- aggr[, c(2, 5, 7, 9, 11, 13, 15, 17, 19)] # names(stats) <- Map(tail, strsplit(names(stats), ".", fixed = TRUE), 1) # round(stats * 100, 2) # sim <- sim[!startsWith(names(sim), "time")] # sim <- sim[names(sim) != "rep"] # names(sim) <- strsplit(names(stats), ".", fixed = TRUE) # (as.data.frame(Map(function(m, s) { # paste0(round(m, 2), " (", round(s, 2), ")") # }, # aggregate(sim, list(sim$size), mean), # aggregate(sim, list(sim$size), sd) # ))) # $n$ & gmlm & pca & hopca lpca & clpca & tsir & mgcca # 100 & 0.34 (0.14) & 0.90 (0.04) & 0.90 (0.05) 0.94 (0.09) & 1 0.91 (0.03) & 0.48 (0.19) & 0.55 (0.13) # 200 & 0.25 (0.11) & 0.90 (0.03) & 0.90 (0.03) 0.96 (0.07) & 2 0.91 (0.02) & 0.38 (0.16) & 0.53 (0.10) # 300 & 0.20 (0.09) & 0.89 (0.02) & 0.89 (0.02) 0.97 (0.06) & 3 0.91 (0.02) & 0.29 (0.13) & 0.51 (0.11) # 500 & 0.16 (0.07) & 0.90 (0.02) & 0.90 (0.02) 0.98 (0.01) & 4 0.91 (0.01) & 0.23 (0.10) & 0.50 (0.08) # 750 & 0.13 (0.05) & 0.90 (0.01) & 0.90 (0.01) 0.98 (0.02) & 5 0.91 (0.01) & 0.23 (0.08) & 0.53 (0.06) if (FALSE) { ################################################################################ ### Work In Progress ### ################################################################################ library(tensorPredictors) dimX <- c(3, 3, 3) dimF <- c(1, 1, 1) betas <- Map(diag, 1, dimX, dimF) Omegas <- rev(list( toeplitz(-1 * (seq_len(dimX[1]) == 2L)), toeplitz(seq(1, 0, len = dimX[2])), diag(dimX[3]) )) Omega <- Reduce(kronecker, rev(Omegas)) layout(matrix(c( 1, 3, 4, 2, 3, 5, 6, 6, 6 ), nrow = 3, byrow = TRUE), heights = c(8, 8, 1)) `E(X |` <- function(Y) { array(diag(ising_m2(diag(as.vector(mlm(array(Y, dimF), betas))) + Omega)), dimX) } `E(X |`(Y = -2) `E(X |`(Y = +2) col <- hcl.colors(256, "Blue-Red 3", rev = FALSE) matrixImage(`E(X |`(Y = -2), main = "E[X | Y = -2]", zlim = c(0, 1), col = col) matrixImage(`E(X |`(Y = -1), main = "E[X | Y = -1]", zlim = c(0, 1), col = col) matrixImage(`E(X |`(Y = 0), main = "E[X | Y = 0]", zlim = c(0, 1), col = col) matrixImage(`E(X |`(Y = +1), main = "E[X | Y = +1]", zlim = c(0, 1), col = col) matrixImage(`E(X |`(Y = +2), main = "E[X | Y = +2]", zlim = c(0, 1), col = col) { restor.par <- par(mar = c(1.1, 2.1, 0, 2.1)) plot(0:1, 0:1, type = "n", xlab = "", ylab = "", axes = FALSE) rasterImage(as.raster(matrix(col, nrow = 1)), 0, 0, 1, 1) mtext("0", side = 2, las = 1, line = -3) mtext("1", side = 4, las = 1, line = -3) par(restor.par) } sample.size <- 100 c(X, F, y, sample.axis) %<-% sample.data(sample.size, betas, Omegas) # Design matrix containing vectorized X's vecX <- mat(X, sample.axis) fit.gmlm <- gmlm_ising(X, F) fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(dimF)) fit.hopca <- HOPCA(X, npc = dimF, sample.axis = sample.axis) fit.tsir <- TSIR(X, y, dimF, sample.axis = sample.axis) fit.mgcca <- local({ X1 <- aperm(X, c(sample.axis, seq_along(dim(X))[-sample.axis])) F1 <- aperm(F, c(sample.axis, seq_along(dim(X))[-sample.axis])) mgcca( list(X1, drop(F1)), # `drop` removes 1D axis quiet = TRUE, scheme = "factorial", ncomp = rep(prod(dimF), 2) ) }) fit.lpca <- logisticPCA(vecX, k = prod(dimF), m = m) fit.clpca <- convexLogisticPCA(vecX, k = prod(dimF), m = m) B.gmlm <- Reduce(kronecker, rev(fit.gmlm$betas)) B.pca <- fit.pca$rotation B.hopca <- Reduce(`%x%`, rev(fit.hopca)) B.tsir <- Reduce(`%x%`, rev(fit.tsir)) B.mgcca <- fit.mgcca$astar[[1]] B.lpca <- fit.lpca$U B.clpca <- fit.clpca$U # B.lsvd <- ??? dist.subspace(B.true, B.gmlm, normalize = TRUE) dist.subspace(B.true, B.pca, normalize = TRUE) dist.subspace(B.true, B.hopca, normalize = TRUE) dist.subspace(B.true, B.tsir, normalize = TRUE) dist.subspace(B.true, B.mgcca, normalize = TRUE) dist.subspace(B.true, B.lpca, normalize = TRUE) dist.subspace(B.true, B.clpca, normalize = TRUE) ################################################################################ ### End - Work In Progress ### ################################################################################ }