tensor_predictors/sim/normal_2.R

97 lines
3.6 KiB
R

library(tensorPredictors)
set.seed(271828183, "Mersenne-Twister", "Inversion", "Rejection")
### simulation configuration
reps <- 100 # number of simulation replications
n <- 100 # sample sizes `n`
N <- 2000 # validation set size
p <- c(2, 3, 5) # preditor dimensions
q <- c(1, 2, 3) # functions of y dimensions (response dimensions)
# initial consistency checks
stopifnot(exprs = {
length(p) == length(q)
})
# setup model parameters
alphas <- Map(matrix, Map(rnorm, p * q), p) # reduction matrices
Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), p) # mode scatter
eta1 <- 0 # intercept
# data sampling routine
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = length(alphas) + 1L) {
r <- length(alphas) # tensor order
# generate response (sample axis is last axis)
y <- sample.int(prod(q), n, replace = TRUE) # uniform samples
Fy <- array(outer(seq_len(prod(q)), y, `==`), dim = c(q, n))
Fy <- Fy - c(rowMeans(Fy, dims = r))
# sample predictors as X | Y = y (sample axis is last axis)
Deltas <- Map(solve, Omegas) # normal covariances
mu_y <- mlm(mlm(Fy, alphas) + c(eta1), Deltas) # conditional mean
X <- mu_y + rtensornorm(n, 0, Deltas, r + 1L) # responses X
# permute axis to requested get the sample axis
if (sample.axis != r + 1L) {
perm <- integer(r + 1L)
perm[sample.axis] <- r + 1L
perm[-sample.axis] <- seq_len(r)
X <- aperm(X, perm)
Fy <- aperm(Fy, perm)
}
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
}
### sample (training) data
c(X, Fy, y = y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
### Fit data using GMLM with logging
# logger to log iterative change in the estimation process of GMLM
# log <- data.frame()
log.likelihood <- tensorPredictors:::make.gmlm.family("normal")$log.likelihood
B.true <- Reduce(`%x%`, rev(alphas))
logger <- function(iter, eta1.est, alphas.est, Omegas.est) {
B.est <- Reduce(`%x%`, rev(alphas.est))
err.alphas <- mapply(dist.subspace, alphas, alphas.est, MoreArgs = list(normalize = TRUE))
err.Omegas <- mapply(norm, Map(`-`, Omegas, Omegas.est), MoreArgs = list(type = "F"))
if (iter > 1) { cat("\033[9A") }
cat(sprintf("\n\033[2mIter: loss - dist\n\033[0m%4d: %8.3f - %8.3f",
iter,
log.likelihood(X, Fy, eta1.est, alphas.est, Omegas.est),
dist.subspace(B.true, B.est, normalize = TRUE)
),
"\033[2mMSE eta1\033[0m",
mean((eta1 - eta1.est)^2),
"\033[2msubspace distances of alphas\033[0m",
do.call(paste, Map(sprintf, err.alphas, MoreArgs = list(fmt = "%8.3f"))),
"\033[2mFrob. norm of Omega differences\033[0m",
do.call(paste, Map(sprintf, err.Omegas, MoreArgs = list(fmt = "%8.3f"))),
sep = "\n "
)
}
# now call the GMLM fitting routine with performance profiling
system.time( # profvis::profvis(
fit.gmlm <- GMLM.default(
X, Fy, sample.axis = sample.axis, max.iter = 10000L, logger = logger
)
)
# Iter: loss - dist
# 7190: 50.583 - 0.057
# MSE eta1
# 0.02694658
# subspace distances of alphas
# 0.043 0.035 0.034
# Frob. norm of Omega differences
# 0.815 1.777 12.756
# time user system elapsed
# 342.279 555.630 183.653