tensor_predictors/tensorPredictors/R/gmlm_tensor_normal.R

358 lines
11 KiB
R

# p <- 5
# A <- matrix(rnorm(p^2), p)
# mat.proj("TriDiag", p)(A)
# mat.proj("SymTriDiag", p)(A)
# A
# (AA <- mat.proj("PSD", p)(A))
# mat.proj("PSD", p)(AA)
# p <- 5
# A <- matrix(rnorm(p^2), p)
# projection <- function(T2) {
# P <- pinv(T2) %*% T2
# function(A) {
# matrix(P %*% as.vector(A), nrow(A))
# }
# }
# # All equal diagonal matrix, A = a * I_p
# T2 <- t(as.vector(diag(p)))
# proj <- projection(T2)
# print.table( diag(mean(diag(A)), nrow(A), ncol(A)) , zero.print = ".")
# print.table( matrix((pinv(T2) %*% T2) %*% as.vector(A), p) , zero.print = ".")
# print.table( proj(A) , zero.print = ".")
# # Diagonal matrix, A = diag(a_1, ..., a_p)
# T2 <- matrix(seq_len(p^2), p)
# T2 <- outer(diag(T2), as.vector(T2), `==`)
# storage.mode(T2) <- "double"
# proj <- projection(T2)
# print.table( T2 , zero.print = ".")
# print.table( diag(diag(A)) , zero.print = ".")
# print.table( proj(A) , zero.print = ".")
# # Tri-Diagonal Matrix
# T2 <- matrix(seq_len(p^2), p)
# T2 <- outer(T2[abs(row(A) - col(A)) <= 1], as.vector(T2), `==`)
# storage.mode(T2) <- "double"
# triDiag.mask <- (abs(row(A) - col(A)) <= 1)
# storage.mode(triDiag.mask) <- "double"
# print.table( T2 , zero.print = ".")
# print.table( triDiag.mask , zero.print = ".")
# print.table( A * triDiag.mask , zero.print = ".")
# print.table( matrix((pinv(T2) %*% T2) %*% as.vector(A), p) , zero.print = ".")
# # All equal main and off diagonals
# T2 <- Reduce(rbind, Map(function(i) {
# as.double(row(A) == i + col(A))
# }, (1 - p):(p - 1)))
# print.table( T2 , zero.print = ".")
# print.table( matrix((pinv(T2) %*% T2) %*% as.vector(A), p) , zero.print = ".")
# # Symmetric all equal main and off diagonals
# T2 <- Reduce(rbind, Map(function(i) {
# as.double(abs(row(A) - col(A)) == i)
# }, 0:(p - 1)))
# print.table( T2 , zero.print = ".")
# print.table( matrix((pinv(T2) %*% T2) %*% as.vector(A), p) , zero.print = ".")
# # Symetric Matrix
# index <- matrix(seq_len(p^2), p)
# T2 <- Reduce(rbind, Map(function(i) {
# e_i <- index == i
# as.double(e_i | t(e_i))
# }, index[lower.tri(index, diag = TRUE)]))
# proj <- projection(T2)
# print.table( T2 , zero.print = ".")
# print.table( 0.5 * (A + t(A)) , zero.print = ".")
# print.table( proj(A) , zero.print = ".")
# proj(solve(A))
# solve(proj(A))
#' Specialized version of GMLM for the tensor normal model
#'
#' The underlying algorithm is an ``iterative (block) coordinate descent'' method
#'
#' @export
gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)),
max.iter = 100L, proj.betas = NULL, proj.Omegas = NULL, logger = NULL, # TODO: proj.betas NOT USED!!!
cond.threshold = 25, eps = 1e-6
) {
# rearrange `X`, `F` such that the last axis enumerates observations
if (!missing(sample.axis)) {
axis.perm <- c(seq_along(dim(X))[-sample.axis], sample.axis)
X <- aperm(X, axis.perm)
F <- aperm(F, axis.perm)
sample.axis <- length(dim(X))
}
# Get problem dimensions (observation dimensions)
dimX <- head(dim(X), -1)
dimF <- head(dim(F), -1)
modes <- seq_along(dimX)
# Ensure the Omega and beta projections lists are lists
if (!is.list(proj.Omegas)) {
proj.Omegas <- rep(NULL, length(modes))
}
if (!is.list(proj.betas)) {
proj.betas <- rep(NULL, length(modes))
}
### Phase 1: Computing initial values
meanX <- rowMeans(X, dims = length(dimX))
meanF <- rowMeans(F, dims = length(dimF))
# center X and F
X <- X - as.vector(meanX)
F <- F - as.vector(meanF)
# initialize Omega estimates as mode-wise, unconditional covariance estimates
Sigmas <- Map(diag, dimX)
Omegas <- Map(diag, dimX)
# Per mode covariance directions
# Note: (the directions are transposed!)
dirsX <- Map(function(Sigma) {
SVD <- La.svd(Sigma, nu = 0)
sqrt(SVD$d) * SVD$vt
}, mcov(X, sample.axis, center = FALSE))
dirsF <- Map(function(Sigma) {
SVD <- La.svd(Sigma, nu = 0)
sqrt(SVD$d) * SVD$vt
}, mcov(F, sample.axis, center = FALSE))
# initialization of betas ``covariance direction mappings``
betas <- Map(function(dX, dF) {
s <- min(ncol(dX), nrow(dF))
crossprod(dX[1:s, , drop = FALSE], dF[1:s, , drop = FALSE])
}, dirsX, dirsF)
# Residuals
R <- X - mlm(F, Map(`%*%`, Sigmas, betas))
# Initial value of the log-likelihood (scaled and constants dropped)
loss <- mean(R * mlm(R, Omegas)) - sum(log(mapply(det, Omegas)) / dimX)
# invoke the logger
if (is.function(logger)) do.call(logger, list(
iter = 0L, betas = betas, Omegas = Omegas,
resid = R, loss = loss
))
### Phase 2: (Block) Coordinate Descent
for (iter in seq_len(max.iter)) {
# update every beta (in random order)
for (j in sample.int(length(betas))) {
FxB_j <- mlm(F, betas[-j], modes[-j])
FxSB_j <- mlm(FxB_j, Sigmas[-j], modes[-j])
betas[[j]] <- Omegas[[j]] %*% t(solve(mcrossprod(FxSB_j, FxB_j, j), mcrossprod(FxB_j, X, j)))
# Project `betas` onto their manifold
if (is.function(proj_j <- proj.betas[[j]])) {
betas[[j]] <- proj_j(betas[[j]])
}
}
# Residuals
R <- X - mlm(F, Map(`%*%`, Sigmas, betas))
# Covariance Estimates (moment based, TODO: implement MLE estimate!)
Sigmas <- mcov(R, sample.axis, center = FALSE)
# Computing `Omega_j`s, the j'th mode presition matrices, in conjunction
# with regularization of the j'th mode covariance estimate `Sigma_j`
for (j in seq_along(Sigmas)) {
# Compute min and max eigen values
min_max <- range(eigen(Sigmas[[j]], TRUE, TRUE)$values)
# The condition is approximately `kappa(Sigmas[[j]]) > cond.threshold`
if (min_max[2] > cond.threshold * min_max[1]) {
Sigmas[[j]] <- Sigmas[[j]] + diag(0.2 * min_max[2], nrow(Sigmas[[j]]))
}
# Compute (unconstraint but regularized) Omega_j as covariance inverse
Omegas[[j]] <- solve(Sigmas[[j]])
# Project Omega_j to the Omega_j's manifold
if (is.function(proj_j <- proj.Omegas[[j]])) {
Omegas[[j]] <- proj_j(Omegas[[j]])
# Reverse computation of `Sigma_j` as inverse of `Omega_j`
# Order of projecting Omega_j and then recomputing Sigma_j is importent
Sigmas[[j]] <- solve(Omegas[[j]])
}
}
# store last loss and compute new value
loss.last <- loss
loss <- mean(R * mlm(R, Omegas)) - sum(log(mapply(det, Omegas)) / dimX)
# invoke the logger
if (is.function(logger)) do.call(logger, list(
iter = iter, betas = betas, Omegas = Omegas, resid = R, loss = loss
))
# check the break consition
if (abs(loss.last - loss) < eps * loss.last) {
break
}
}
list(eta1 = meanX, betas = betas, Omegas = Omegas)
}
# #### DEBUGGING
# library(tensorPredictors)
# # setup dimensions
# n <- 1e3
# p <- c(5, 7, 3)
# q <- c(2, 5, 3)
# max.iter <- 10L
# # create "true" GLM parameters
# eta1 <- array(rnorm(prod(p)), dim = p)
# betas <- Map(matrix, Map(rnorm, p * q), p)
# Omegas <- Map(function(p_j) {
# solve(0.5^abs(outer(seq_len(p_j), seq_len(p_j), `-`)))
# }, p)
# true.params <- list(eta1 = eta1, betas = betas, Omegas = Omegas)
# # compute tensor normal parameters from GMLM parameters
# Sigmas <- Map(solve, Omegas)
# mu <- mlm(eta1, Sigmas)
# # sample some test data
# sample.axis <- length(p) + 1L
# F <- array(rnorm(n * prod(q)), dim = c(q, n))
# X <- mlm(F, Map(`%*%`, Sigmas, betas)) + rtensornorm(n, mu, Sigmas, sample.axis)
# # setup a logging callback
# format <- sprintf("iter: %%3d, %s, Omega: %%8.2f, resid: %%8.3f\n",
# paste0("beta.", seq_along(betas), ": %.3f", collapse = ", ")
# )
# hist <- data.frame(
# iter = seq(0, max.iter),
# dist.B = numeric(max.iter + 1),
# dist.Omega = numeric(max.iter + 1),
# resid = numeric(max.iter + 1)
# )
# logger <- function(iter, betas, Omegas, resid) {
# dist.B <- dist.kron.norm(true.params$betas, betas)
# dist.Omega <- dist.kron.norm(true.params$Omegas, Omegas)
# resid <- mean(resid^2)
# hist[iter + 1, -1] <<- c(dist.B = dist.B, dist.Omega = dist.Omega, resid = resid)
# cat(do.call(sprintf, c(
# list(format, iter),
# Map(dist.subspace, betas, true.params$betas),
# dist.Omega, resid
# )))
# }
# # run the model
# est.params <- gmlm_tensor_normal(X, F, max.iter = max.iter, logger = logger)
# # run model with Omegas in sub-manifolds of SPD matrices
# est.params <- gmlm_tensor_normal(X, F, max.iter = max.iter, logger = logger,
# proj.Omegas = list(
# NULL,
# (function(nrow, ncol) {
# triDiag.mask <- (abs(.row(c(nrow, ncol)) - .col(c(nrow, ncol))) <= 1)
# storage.mode(triDiag.mask) <- "double"
# function(A) { A * triDiag.mask }
# })(p[2], p[2]),
# function(A) diag(diag(A))
# ))
# # # Profile the fitting routine without logging
# # profvis::profvis({
# # est.params <- gmlm_tensor_normal(X, F, max.iter = max.iter)
# # })
# with(hist, {
# plot(range(iter), range(hist[, -1]), type = "n", log = "y")
# lines(iter, resid, col = "red")
# lines(iter, dist.B, col = "green")
# lines(iter, dist.Omega, col = "blue")
# legend("topright", legend = c("resid", "dist.B", "dist.Omega"), col = c("red", "green", "blue"), lty = 1)
# })
# # par(mfrow = c(1, 2))
# # matrixImage(Reduce(kronecker, rev(est.params$Omegas)))
# # matrixImage(Reduce(kronecker, rev(true.params$Omegas)))
# # # matrixImage(Reduce(kronecker, rev(est.params$Omegas)) - Reduce(kronecker, rev(true.params$Omegas)))
# # par(mfrow = c(1, 2))
# # matrixImage(Reduce(kronecker, rev(true.params$betas)), main = "True", col = hcl.colors(48, palette = "Blue-Red 3"))
# # matrixImage(Reduce(kronecker, rev(est.params$betas)), main = "Est", col = hcl.colors(48, palette = "Blue-Red 3"))
# # # unlist(Map(kappa, mcov(X)))
# # # unlist(Map(kappa, Omegas))
# # # unlist(Map(kappa, Sigmas))
# # # max(unlist(Map(function(C) max(eigen(C)$values), mcov(X))))
# # # min(unlist(Map(function(C) min(eigen(C)$values), mcov(X))))
# # # Map(function(C) eigen(C)$values, mcov(X))
# # # kappa(Reduce(kronecker, mcov(X)))
# # kappa1.fun <- function(Omegas) Map(kappa, Omegas)
# # kappa2.fun <- function(Omegas) Map(kappa, Omegas, exact = TRUE)
# # eigen1.fun <- function(Omegas) {
# # Map(function(Omega) {
# # min_max <- range(eigen(Omega)$values)
# # min_max[2] / min_max[1]
# # }, Omegas)
# # }
# # eigen2.fun <- function(Omegas) {
# # Map(function(Omega) {
# # min_max <- range(eigen(Omega, TRUE, TRUE)$values)
# # min_max[2] / min_max[1]
# # }, Omegas)
# # }
# # microbenchmark::microbenchmark(
# # kappa1.fun(Omegas),
# # kappa2.fun(Omegas),
# # eigen1.fun(Omegas),
# # eigen2.fun(Omegas)
# # )