316 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
			
		
		
	
	
			316 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
# library(tensorPredictors)
 | 
						|
devtools::load_all("~/Work/tensorPredictors/tensorPredictors", export_all = FALSE)
 | 
						|
 | 
						|
library(logisticPCA)
 | 
						|
# library(RGCCA)
 | 
						|
# Use modified version of `RGCCA`
 | 
						|
# Reasons (on Ubuntu 22.04 LTS):
 | 
						|
#   - compatible with `Rscript`
 | 
						|
#   - about 4 times faster for small problems
 | 
						|
# Changes:
 | 
						|
#   - Run in main thread, avoid `parallel::makeCluster` if `n_cores == 1`
 | 
						|
#     (file "R/mgccak.R" lines 81:103)
 | 
						|
#   - added `Encoding: UTF-8`
 | 
						|
#     (file "DESCRIPTION")
 | 
						|
suppressWarnings({
 | 
						|
    devtools::load_all("~/Work/tensorPredictors/References/Software/TGCCA-modified", export_all = FALSE)
 | 
						|
})
 | 
						|
 | 
						|
setwd("~/Work/tensorPredictors/sim/")
 | 
						|
base.name <- format(Sys.time(), "sim_2a_ising-%Y%m%dT%H%M")
 | 
						|
 | 
						|
# Source utility function used in most simulations (extracted for convenience)
 | 
						|
source("./sim_utils.R")
 | 
						|
 | 
						|
# Set PRNG seed for reproducability
 | 
						|
# Note: `0x` is the HEX number prefix and the trailing `L` stands for "long"
 | 
						|
# which is `R`s way if indicating an integer.
 | 
						|
set.seed(0x2aL, "Mersenne-Twister", "Inversion", "Rejection")
 | 
						|
 | 
						|
 | 
						|
reps <- 100                     # number of simulation replications
 | 
						|
sample.sizes <- c(100, 200, 300, 500, 750)  # sample sizes `n`
 | 
						|
dimX <- c(2, 3)              # predictor `X` dimension
 | 
						|
dimF <- rep(1, length(dimX))    # "function" `F(y)` of responce `y` dimension
 | 
						|
 | 
						|
betas <- Map(diag, 1, dimX, dimF)
 | 
						|
Omegas <- list(toeplitz(c(0, -2)), toeplitz(seq(1, 0, by = -0.5)))
 | 
						|
 | 
						|
 | 
						|
# data sampling routine
 | 
						|
sample.data <- function(sample.size, betas, Omegas) {
 | 
						|
    dimX <- mapply(nrow, betas)
 | 
						|
    dimF <- mapply(ncol, betas)
 | 
						|
 | 
						|
    # generate response (sample axis is last axis)
 | 
						|
    y <- runif(prod(sample.size, dimF), -2, 2)
 | 
						|
    F <- array(y, dim = c(dimF, sample.size))    # ~ U[-1, 1]
 | 
						|
 | 
						|
    Omega <- Reduce(kronecker, rev(Omegas))
 | 
						|
 | 
						|
    X <- apply(F, length(dim(F)), function(Fi) {
 | 
						|
        dim(Fi) <- dimF
 | 
						|
        params <- diag(as.vector(mlm(Fi, betas))) + Omega
 | 
						|
        tensorPredictors::ising_sample(1, params)
 | 
						|
    })
 | 
						|
    dim(X) <- c(dimX, sample.size)
 | 
						|
 | 
						|
    list(X = X, F = F, y = y, sample.axis = length(dim(X)))
 | 
						|
}
 | 
						|
 | 
						|
lpca.hyper.param <- local({
 | 
						|
    c(X, F, y, sample.axis) %<-% sample.data(1e3, betas, Omegas)
 | 
						|
    vecX <- mat(X, sample.axis)
 | 
						|
    CV <- cv.lpca(vecX, ks = prod(dimF), ms = seq(1, 20, by = 0.5))
 | 
						|
    # plot(CV)
 | 
						|
    as.numeric(colnames(CV))[which.min(CV)]
 | 
						|
})
 | 
						|
 | 
						|
 | 
						|
# Create a CSV logger to write simulation results to
 | 
						|
log.file <- paste(base.name, "csv", sep = ".")
 | 
						|
logger <- CSV.logger(
 | 
						|
    file.name = log.file,
 | 
						|
    header = c("sample.size", "rep", outer(
 | 
						|
        c("time", "dist.subspace"),                  # measures
 | 
						|
        c("gmlm", "tnormal", "pca", "hopca", "lpca", "clpca", "tsir", "mgcca"),    # methods
 | 
						|
        paste, sep = "."
 | 
						|
    ))
 | 
						|
)
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
# compute true (full) model parameters to compair estimates against
 | 
						|
B.true <- Reduce(`%x%`, rev(betas))
 | 
						|
 | 
						|
### for each sample size
 | 
						|
for (sample.size in sample.sizes) {
 | 
						|
    # repeate every simulation
 | 
						|
    for (rep in seq_len(reps)) {
 | 
						|
        # Sample training data
 | 
						|
        c(X, F, y, sample.axis) %<-% sample.data(sample.size, betas, Omegas)
 | 
						|
 | 
						|
        # fit different models
 | 
						|
        # Wrapped in try-catch clock to ensure the simulation continues,
 | 
						|
        # if an error occures continue with nest resplication and log an error message
 | 
						|
        try.catch.block <- tryCatch({
 | 
						|
            time.gmlm <- system.time(
 | 
						|
                fit.gmlm <- gmlm_ising(X, F, sample.axis = sample.axis)
 | 
						|
            )["user.self"]
 | 
						|
            time.tnormal <- -1  # part of Ising gmlm (not relevent here)
 | 
						|
            time.pca <- system.time(
 | 
						|
                fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(dimF))
 | 
						|
            )["user.self"]
 | 
						|
            time.hopca <- system.time(
 | 
						|
                fit.hopca <- HOPCA(X, npc = dimF, sample.axis = sample.axis)
 | 
						|
            )["user.self"]
 | 
						|
            time.lpca <- system.time(
 | 
						|
                fit.lpca <- logisticPCA(mat(X, sample.axis), k = prod(dimF), m = lpca.hyper.param)
 | 
						|
            )["user.self"]
 | 
						|
            time.clpca <- system.time(
 | 
						|
                fit.clpca <- convexLogisticPCA(mat(X, sample.axis), k = prod(dimF), m = lpca.hyper.param)
 | 
						|
            )["user.self"]
 | 
						|
            time.tsir <- system.time(
 | 
						|
                fit.tsir <- TSIR(X, y, dimF, sample.axis = sample.axis)
 | 
						|
            )["user.self"]
 | 
						|
            # `mgcca` expects the first axis to be the sample axis
 | 
						|
            X1 <- aperm(X, c(sample.axis, seq_along(dim(X))[-sample.axis]))
 | 
						|
            time.mgcca <- system.time(
 | 
						|
                fit.mgcca <- mgcca(
 | 
						|
                    list(X1, y), # `drop` removes 1D axis
 | 
						|
                    quiet = TRUE,
 | 
						|
                    scheme = "factorial",
 | 
						|
                    ncomp = c(1, 1)
 | 
						|
                )
 | 
						|
            )["user.self"]
 | 
						|
        }, error = print)
 | 
						|
 | 
						|
        # Drop comparison in case any error (in any fitting routine)
 | 
						|
        if (inherits(try.catch.block, "error")) { next }
 | 
						|
 | 
						|
        # Compute true reduction matrix
 | 
						|
        B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(betas)))
 | 
						|
        B.tnormal <- with(attr(fit.gmlm, "tensor_normal"), Reduce(`%x%`, rev(betas)))
 | 
						|
        B.pca <- fit.pca$rotation
 | 
						|
        B.hopca <- Reduce(`%x%`, rev(fit.hopca))
 | 
						|
        B.lpca <- fit.lpca$U
 | 
						|
        B.clpca <- fit.clpca$U
 | 
						|
        B.tsir <- Reduce(`%x%`, rev(fit.tsir))
 | 
						|
        B.mgcca <- fit.mgcca$astar[[1]]
 | 
						|
 | 
						|
        # Subspace Distances: Normalized `|| P_A - P_B ||_F` where
 | 
						|
        #   `P_A = A (A' A)^-1 A'` and the normalization means that with
 | 
						|
        #   respect to the dimensions of `A, B` the subspace distance is in the
 | 
						|
        #   range `[0, 1]`.
 | 
						|
        dist.subspace.gmlm    <- dist.subspace(B.true, B.gmlm,    normalize = TRUE)
 | 
						|
        dist.subspace.tnormal <- dist.subspace(B.true, B.tnormal, normalize = TRUE)
 | 
						|
        dist.subspace.pca     <- dist.subspace(B.true, B.pca,     normalize = TRUE)
 | 
						|
        dist.subspace.hopca   <- dist.subspace(B.true, B.hopca,   normalize = TRUE)
 | 
						|
        dist.subspace.lpca    <- dist.subspace(B.true, B.lpca,    normalize = TRUE)
 | 
						|
        dist.subspace.clpca   <- dist.subspace(B.true, B.clpca,   normalize = TRUE)
 | 
						|
        dist.subspace.tsir    <- dist.subspace(B.true, B.tsir,    normalize = TRUE)
 | 
						|
        dist.subspace.mgcca   <- dist.subspace(B.true, B.mgcca,   normalize = TRUE)
 | 
						|
 | 
						|
        # # Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
 | 
						|
        # equiv to Subspace distance in this case
 | 
						|
 | 
						|
        # Call CSV logger writing results to file
 | 
						|
        logger()
 | 
						|
 | 
						|
        # print progress
 | 
						|
        cat(sprintf("sample size (%d): %d/%d - rep: %d/%d\n",
 | 
						|
            sample.size, which(sample.size == sample.sizes),
 | 
						|
            length(sample.sizes), rep, reps))
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
### read simulation results generate plots
 | 
						|
if (!interactive()) { pdf(file = paste(base.name, "pdf", sep = ".")) }
 | 
						|
 | 
						|
sim <- read.csv(log.file)
 | 
						|
 | 
						|
 | 
						|
plot.sim(sim, "dist.subspace", main = "Subspace Distance",
 | 
						|
    xlab = "Sample Size", ylab = "Distance")
 | 
						|
 | 
						|
# plot.sim(sim, "dist.projection", main = "Projection Distance",
 | 
						|
#     xlab = "Sample Size", ylab = "Distance")
 | 
						|
 | 
						|
plot.sim(sim, "time", main = "Runtime",
 | 
						|
    xlab = "Sample Size", ylab = "Time [s]", ylim = c(0, 18))
 | 
						|
 | 
						|
# aggr <- aggregate(sim, list(sim$sample.size), sd)
 | 
						|
# stats <- aggr[, c(2, 5, 7, 9, 11, 13, 15, 17, 19)]
 | 
						|
 | 
						|
# names(stats) <- Map(tail, strsplit(names(stats), ".", fixed = TRUE), 1)
 | 
						|
# round(stats * 100, 2)
 | 
						|
 | 
						|
# sim <- sim[!startsWith(names(sim), "time")]
 | 
						|
# sim <- sim[names(sim) != "rep"]
 | 
						|
# names(sim) <- strsplit(names(stats), ".", fixed = TRUE)
 | 
						|
# (as.data.frame(Map(function(m, s) {
 | 
						|
#     paste0(round(m, 2), " (", round(s, 2), ")")
 | 
						|
# },
 | 
						|
#     aggregate(sim, list(sim$size), mean),
 | 
						|
#     aggregate(sim, list(sim$size), sd)
 | 
						|
# )))
 | 
						|
 | 
						|
# $n$ &      gmlm   &       pca   &     hopca        lpca   &         clpca &        tsir &       mgcca
 | 
						|
# 100 & 0.34 (0.14) & 0.90 (0.04) & 0.90 (0.05) 0.94 (0.09) & 1 0.91 (0.03) & 0.48 (0.19) & 0.55 (0.13)
 | 
						|
# 200 & 0.25 (0.11) & 0.90 (0.03) & 0.90 (0.03) 0.96 (0.07) & 2 0.91 (0.02) & 0.38 (0.16) & 0.53 (0.10)
 | 
						|
# 300 & 0.20 (0.09) & 0.89 (0.02) & 0.89 (0.02) 0.97 (0.06) & 3 0.91 (0.02) & 0.29 (0.13) & 0.51 (0.11)
 | 
						|
# 500 & 0.16 (0.07) & 0.90 (0.02) & 0.90 (0.02) 0.98 (0.01) & 4 0.91 (0.01) & 0.23 (0.10) & 0.50 (0.08)
 | 
						|
# 750 & 0.13 (0.05) & 0.90 (0.01) & 0.90 (0.01) 0.98 (0.02) & 5 0.91 (0.01) & 0.23 (0.08) & 0.53 (0.06)
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
if (FALSE) {
 | 
						|
################################################################################
 | 
						|
###                             Work In Progress                             ###
 | 
						|
################################################################################
 | 
						|
library(tensorPredictors)
 | 
						|
 | 
						|
dimX <- c(3, 3, 3)
 | 
						|
dimF <- c(1, 1, 1)
 | 
						|
 | 
						|
betas <- Map(diag, 1, dimX, dimF)
 | 
						|
Omegas <- rev(list(
 | 
						|
    toeplitz(-1 * (seq_len(dimX[1]) == 2L)),
 | 
						|
    toeplitz(seq(1, 0, len = dimX[2])),
 | 
						|
    diag(dimX[3])
 | 
						|
))
 | 
						|
Omega <- Reduce(kronecker, rev(Omegas))
 | 
						|
 | 
						|
layout(matrix(c(
 | 
						|
    1, 3, 4,
 | 
						|
    2, 3, 5,
 | 
						|
    6, 6, 6
 | 
						|
), nrow = 3, byrow = TRUE), heights = c(8, 8, 1))
 | 
						|
 | 
						|
 | 
						|
`E(X |` <- function(Y) {
 | 
						|
    array(diag(ising_m2(diag(as.vector(mlm(array(Y, dimF), betas))) + Omega)), dimX)
 | 
						|
}
 | 
						|
`E(X |`(Y = -2)
 | 
						|
`E(X |`(Y = +2)
 | 
						|
 | 
						|
col <- hcl.colors(256, "Blue-Red 3", rev = FALSE)
 | 
						|
matrixImage(`E(X |`(Y = -2), main = "E[X | Y = -2]", zlim = c(0, 1), col = col)
 | 
						|
matrixImage(`E(X |`(Y = -1), main = "E[X | Y = -1]", zlim = c(0, 1), col = col)
 | 
						|
matrixImage(`E(X |`(Y =  0), main = "E[X | Y =  0]", zlim = c(0, 1), col = col)
 | 
						|
matrixImage(`E(X |`(Y = +1), main = "E[X | Y = +1]", zlim = c(0, 1), col = col)
 | 
						|
matrixImage(`E(X |`(Y = +2), main = "E[X | Y = +2]", zlim = c(0, 1), col = col)
 | 
						|
{
 | 
						|
    restor.par <- par(mar = c(1.1, 2.1, 0, 2.1))
 | 
						|
    plot(0:1, 0:1, type = "n", xlab = "", ylab = "", axes = FALSE)
 | 
						|
    rasterImage(as.raster(matrix(col, nrow = 1)), 0, 0, 1, 1)
 | 
						|
    mtext("0", side = 2, las = 1, line = -3)
 | 
						|
    mtext("1", side = 4, las = 1, line = -3)
 | 
						|
    par(restor.par)
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
sample.size <- 100
 | 
						|
c(X, F, y, sample.axis) %<-% sample.data(sample.size, betas, Omegas)
 | 
						|
# Design matrix containing vectorized X's
 | 
						|
vecX <- mat(X, sample.axis)
 | 
						|
 | 
						|
 | 
						|
fit.gmlm <- gmlm_ising(X, F)
 | 
						|
fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(dimF))
 | 
						|
fit.hopca <- HOPCA(X, npc = dimF, sample.axis = sample.axis)
 | 
						|
fit.tsir <- TSIR(X, y, dimF, sample.axis = sample.axis)
 | 
						|
fit.mgcca <- local({
 | 
						|
    X1 <- aperm(X, c(sample.axis, seq_along(dim(X))[-sample.axis]))
 | 
						|
    F1 <- aperm(F, c(sample.axis, seq_along(dim(X))[-sample.axis]))
 | 
						|
    mgcca(
 | 
						|
        list(X1, drop(F1)), # `drop` removes 1D axis
 | 
						|
        quiet = TRUE,
 | 
						|
        scheme = "factorial",
 | 
						|
        ncomp = rep(prod(dimF), 2)
 | 
						|
    )
 | 
						|
})
 | 
						|
fit.lpca <- logisticPCA(vecX, k = prod(dimF), m = m)
 | 
						|
fit.clpca <- convexLogisticPCA(vecX, k = prod(dimF), m = m)
 | 
						|
 | 
						|
 | 
						|
B.gmlm <- Reduce(kronecker, rev(fit.gmlm$betas))
 | 
						|
B.pca <- fit.pca$rotation
 | 
						|
B.hopca <- Reduce(`%x%`, rev(fit.hopca))
 | 
						|
B.tsir <- Reduce(`%x%`, rev(fit.tsir))
 | 
						|
B.mgcca <- fit.mgcca$astar[[1]]
 | 
						|
B.lpca <- fit.lpca$U
 | 
						|
B.clpca <- fit.clpca$U
 | 
						|
# B.lsvd <- ???
 | 
						|
 | 
						|
 | 
						|
 | 
						|
dist.subspace(B.true, B.gmlm, normalize = TRUE)
 | 
						|
dist.subspace(B.true, B.pca, normalize = TRUE)
 | 
						|
dist.subspace(B.true, B.hopca, normalize = TRUE)
 | 
						|
dist.subspace(B.true, B.tsir,  normalize = TRUE)
 | 
						|
dist.subspace(B.true, B.mgcca, normalize = TRUE)
 | 
						|
dist.subspace(B.true, B.lpca, normalize = TRUE)
 | 
						|
dist.subspace(B.true, B.clpca, normalize = TRUE)
 | 
						|
 | 
						|
 | 
						|
 | 
						|
################################################################################
 | 
						|
###                          End - Work In Progress                          ###
 | 
						|
################################################################################
 | 
						|
}
 |