358 lines
11 KiB
R
358 lines
11 KiB
R
|
|
# p <- 5
|
|
# A <- matrix(rnorm(p^2), p)
|
|
|
|
# mat.proj("TriDiag", p)(A)
|
|
# mat.proj("SymTriDiag", p)(A)
|
|
|
|
# A
|
|
# (AA <- mat.proj("PSD", p)(A))
|
|
# mat.proj("PSD", p)(AA)
|
|
|
|
|
|
# p <- 5
|
|
# A <- matrix(rnorm(p^2), p)
|
|
|
|
# projection <- function(T2) {
|
|
# P <- pinv(T2) %*% T2
|
|
# function(A) {
|
|
# matrix(P %*% as.vector(A), nrow(A))
|
|
# }
|
|
# }
|
|
|
|
|
|
# # All equal diagonal matrix, A = a * I_p
|
|
# T2 <- t(as.vector(diag(p)))
|
|
# proj <- projection(T2)
|
|
|
|
# print.table( diag(mean(diag(A)), nrow(A), ncol(A)) , zero.print = ".")
|
|
# print.table( matrix((pinv(T2) %*% T2) %*% as.vector(A), p) , zero.print = ".")
|
|
# print.table( proj(A) , zero.print = ".")
|
|
|
|
|
|
# # Diagonal matrix, A = diag(a_1, ..., a_p)
|
|
# T2 <- matrix(seq_len(p^2), p)
|
|
# T2 <- outer(diag(T2), as.vector(T2), `==`)
|
|
# storage.mode(T2) <- "double"
|
|
# proj <- projection(T2)
|
|
|
|
# print.table( T2 , zero.print = ".")
|
|
|
|
# print.table( diag(diag(A)) , zero.print = ".")
|
|
# print.table( proj(A) , zero.print = ".")
|
|
|
|
|
|
# # Tri-Diagonal Matrix
|
|
# T2 <- matrix(seq_len(p^2), p)
|
|
# T2 <- outer(T2[abs(row(A) - col(A)) <= 1], as.vector(T2), `==`)
|
|
# storage.mode(T2) <- "double"
|
|
|
|
# triDiag.mask <- (abs(row(A) - col(A)) <= 1)
|
|
# storage.mode(triDiag.mask) <- "double"
|
|
|
|
# print.table( T2 , zero.print = ".")
|
|
# print.table( triDiag.mask , zero.print = ".")
|
|
|
|
# print.table( A * triDiag.mask , zero.print = ".")
|
|
# print.table( matrix((pinv(T2) %*% T2) %*% as.vector(A), p) , zero.print = ".")
|
|
|
|
|
|
|
|
# # All equal main and off diagonals
|
|
# T2 <- Reduce(rbind, Map(function(i) {
|
|
# as.double(row(A) == i + col(A))
|
|
# }, (1 - p):(p - 1)))
|
|
|
|
# print.table( T2 , zero.print = ".")
|
|
|
|
# print.table( matrix((pinv(T2) %*% T2) %*% as.vector(A), p) , zero.print = ".")
|
|
|
|
|
|
# # Symmetric all equal main and off diagonals
|
|
# T2 <- Reduce(rbind, Map(function(i) {
|
|
# as.double(abs(row(A) - col(A)) == i)
|
|
# }, 0:(p - 1)))
|
|
|
|
# print.table( T2 , zero.print = ".")
|
|
|
|
# print.table( matrix((pinv(T2) %*% T2) %*% as.vector(A), p) , zero.print = ".")
|
|
|
|
|
|
# # Symetric Matrix
|
|
# index <- matrix(seq_len(p^2), p)
|
|
# T2 <- Reduce(rbind, Map(function(i) {
|
|
# e_i <- index == i
|
|
# as.double(e_i | t(e_i))
|
|
# }, index[lower.tri(index, diag = TRUE)]))
|
|
# proj <- projection(T2)
|
|
|
|
# print.table( T2 , zero.print = ".")
|
|
|
|
# print.table( 0.5 * (A + t(A)) , zero.print = ".")
|
|
# print.table( proj(A) , zero.print = ".")
|
|
|
|
# proj(solve(A))
|
|
# solve(proj(A))
|
|
|
|
|
|
|
|
|
|
#' Specialized version of GMLM for the tensor normal model
|
|
#'
|
|
#' The underlying algorithm is an ``iterative (block) coordinate descent'' method
|
|
#'
|
|
#' @export
|
|
gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)),
|
|
max.iter = 100L, proj.betas = NULL, proj.Omegas = NULL, logger = NULL, # TODO: proj.betas NOT USED!!!
|
|
cond.threshold = 25, eps = 1e-6
|
|
) {
|
|
# rearrange `X`, `F` such that the last axis enumerates observations
|
|
if (!missing(sample.axis)) {
|
|
axis.perm <- c(seq_along(dim(X))[-sample.axis], sample.axis)
|
|
X <- aperm(X, axis.perm)
|
|
F <- aperm(F, axis.perm)
|
|
sample.axis <- length(dim(X))
|
|
}
|
|
|
|
# Get problem dimensions (observation dimensions)
|
|
dimX <- head(dim(X), -1)
|
|
dimF <- head(dim(F), -1)
|
|
modes <- seq_along(dimX)
|
|
|
|
# Ensure the Omega and beta projections lists are lists
|
|
if (!is.list(proj.Omegas)) {
|
|
proj.Omegas <- rep(NULL, length(modes))
|
|
}
|
|
if (!is.list(proj.betas)) {
|
|
proj.betas <- rep(NULL, length(modes))
|
|
}
|
|
|
|
|
|
### Phase 1: Computing initial values
|
|
meanX <- rowMeans(X, dims = length(dimX))
|
|
meanF <- rowMeans(F, dims = length(dimF))
|
|
|
|
# center X and F
|
|
X <- X - as.vector(meanX)
|
|
F <- F - as.vector(meanF)
|
|
|
|
# initialize Omega estimates as mode-wise, unconditional covariance estimates
|
|
Sigmas <- Map(diag, dimX)
|
|
Omegas <- Map(diag, dimX)
|
|
|
|
# Per mode covariance directions
|
|
# Note: (the directions are transposed!)
|
|
dirsX <- Map(function(Sigma) {
|
|
SVD <- La.svd(Sigma, nu = 0)
|
|
sqrt(SVD$d) * SVD$vt
|
|
}, mcov(X, sample.axis, center = FALSE))
|
|
dirsF <- Map(function(Sigma) {
|
|
SVD <- La.svd(Sigma, nu = 0)
|
|
sqrt(SVD$d) * SVD$vt
|
|
}, mcov(F, sample.axis, center = FALSE))
|
|
|
|
# initialization of betas ``covariance direction mappings``
|
|
betas <- Map(function(dX, dF) {
|
|
s <- min(ncol(dX), nrow(dF))
|
|
crossprod(dX[1:s, , drop = FALSE], dF[1:s, , drop = FALSE])
|
|
}, dirsX, dirsF)
|
|
|
|
# Residuals
|
|
R <- X - mlm(F, Map(`%*%`, Sigmas, betas))
|
|
|
|
# Initial value of the log-likelihood (scaled and constants dropped)
|
|
loss <- mean(R * mlm(R, Omegas)) - sum(log(mapply(det, Omegas)) / dimX)
|
|
|
|
# invoke the logger
|
|
if (is.function(logger)) do.call(logger, list(
|
|
iter = 0L, betas = betas, Omegas = Omegas,
|
|
resid = R, loss = loss
|
|
))
|
|
|
|
|
|
### Phase 2: (Block) Coordinate Descent
|
|
for (iter in seq_len(max.iter)) {
|
|
|
|
# update every beta (in random order)
|
|
for (j in sample.int(length(betas))) {
|
|
FxB_j <- mlm(F, betas[-j], modes[-j])
|
|
FxSB_j <- mlm(FxB_j, Sigmas[-j], modes[-j])
|
|
betas[[j]] <- Omegas[[j]] %*% t(solve(mcrossprod(FxSB_j, FxB_j, j), mcrossprod(FxB_j, X, j)))
|
|
# Project `betas` onto their manifold
|
|
if (is.function(proj_j <- proj.betas[[j]])) {
|
|
betas[[j]] <- proj_j(betas[[j]])
|
|
}
|
|
}
|
|
|
|
# Residuals
|
|
R <- X - mlm(F, Map(`%*%`, Sigmas, betas))
|
|
|
|
# Covariance Estimates (moment based, TODO: implement MLE estimate!)
|
|
Sigmas <- mcov(R, sample.axis, center = FALSE)
|
|
|
|
# Computing `Omega_j`s, the j'th mode presition matrices, in conjunction
|
|
# with regularization of the j'th mode covariance estimate `Sigma_j`
|
|
for (j in seq_along(Sigmas)) {
|
|
# Compute min and max eigen values
|
|
min_max <- range(eigen(Sigmas[[j]], TRUE, TRUE)$values)
|
|
# The condition is approximately `kappa(Sigmas[[j]]) > cond.threshold`
|
|
if (min_max[2] > cond.threshold * min_max[1]) {
|
|
Sigmas[[j]] <- Sigmas[[j]] + diag(0.2 * min_max[2], nrow(Sigmas[[j]]))
|
|
}
|
|
# Compute (unconstraint but regularized) Omega_j as covariance inverse
|
|
Omegas[[j]] <- solve(Sigmas[[j]])
|
|
# Project Omega_j to the Omega_j's manifold
|
|
if (is.function(proj_j <- proj.Omegas[[j]])) {
|
|
Omegas[[j]] <- proj_j(Omegas[[j]])
|
|
# Reverse computation of `Sigma_j` as inverse of `Omega_j`
|
|
# Order of projecting Omega_j and then recomputing Sigma_j is importent
|
|
Sigmas[[j]] <- solve(Omegas[[j]])
|
|
}
|
|
}
|
|
|
|
# store last loss and compute new value
|
|
loss.last <- loss
|
|
loss <- mean(R * mlm(R, Omegas)) - sum(log(mapply(det, Omegas)) / dimX)
|
|
|
|
# invoke the logger
|
|
if (is.function(logger)) do.call(logger, list(
|
|
iter = iter, betas = betas, Omegas = Omegas, resid = R, loss = loss
|
|
))
|
|
|
|
# check the break consition
|
|
if (abs(loss.last - loss) < eps * loss.last) {
|
|
break
|
|
}
|
|
}
|
|
|
|
|
|
list(eta1 = meanX, betas = betas, Omegas = Omegas)
|
|
}
|
|
|
|
|
|
|
|
# #### DEBUGGING
|
|
# library(tensorPredictors)
|
|
|
|
# # setup dimensions
|
|
# n <- 1e3
|
|
# p <- c(5, 7, 3)
|
|
# q <- c(2, 5, 3)
|
|
|
|
# max.iter <- 10L
|
|
|
|
# # create "true" GLM parameters
|
|
# eta1 <- array(rnorm(prod(p)), dim = p)
|
|
# betas <- Map(matrix, Map(rnorm, p * q), p)
|
|
# Omegas <- Map(function(p_j) {
|
|
# solve(0.5^abs(outer(seq_len(p_j), seq_len(p_j), `-`)))
|
|
# }, p)
|
|
# true.params <- list(eta1 = eta1, betas = betas, Omegas = Omegas)
|
|
|
|
# # compute tensor normal parameters from GMLM parameters
|
|
# Sigmas <- Map(solve, Omegas)
|
|
# mu <- mlm(eta1, Sigmas)
|
|
|
|
# # sample some test data
|
|
# sample.axis <- length(p) + 1L
|
|
# F <- array(rnorm(n * prod(q)), dim = c(q, n))
|
|
# X <- mlm(F, Map(`%*%`, Sigmas, betas)) + rtensornorm(n, mu, Sigmas, sample.axis)
|
|
|
|
# # setup a logging callback
|
|
# format <- sprintf("iter: %%3d, %s, Omega: %%8.2f, resid: %%8.3f\n",
|
|
# paste0("beta.", seq_along(betas), ": %.3f", collapse = ", ")
|
|
# )
|
|
# hist <- data.frame(
|
|
# iter = seq(0, max.iter),
|
|
# dist.B = numeric(max.iter + 1),
|
|
# dist.Omega = numeric(max.iter + 1),
|
|
# resid = numeric(max.iter + 1)
|
|
# )
|
|
# logger <- function(iter, betas, Omegas, resid) {
|
|
# dist.B <- dist.kron.norm(true.params$betas, betas)
|
|
# dist.Omega <- dist.kron.norm(true.params$Omegas, Omegas)
|
|
# resid <- mean(resid^2)
|
|
|
|
# hist[iter + 1, -1] <<- c(dist.B = dist.B, dist.Omega = dist.Omega, resid = resid)
|
|
# cat(do.call(sprintf, c(
|
|
# list(format, iter),
|
|
# Map(dist.subspace, betas, true.params$betas),
|
|
# dist.Omega, resid
|
|
# )))
|
|
# }
|
|
|
|
# # run the model
|
|
# est.params <- gmlm_tensor_normal(X, F, max.iter = max.iter, logger = logger)
|
|
|
|
# # run model with Omegas in sub-manifolds of SPD matrices
|
|
# est.params <- gmlm_tensor_normal(X, F, max.iter = max.iter, logger = logger,
|
|
# proj.Omegas = list(
|
|
# NULL,
|
|
# (function(nrow, ncol) {
|
|
# triDiag.mask <- (abs(.row(c(nrow, ncol)) - .col(c(nrow, ncol))) <= 1)
|
|
# storage.mode(triDiag.mask) <- "double"
|
|
# function(A) { A * triDiag.mask }
|
|
# })(p[2], p[2]),
|
|
# function(A) diag(diag(A))
|
|
# ))
|
|
|
|
|
|
|
|
# # # Profile the fitting routine without logging
|
|
# # profvis::profvis({
|
|
# # est.params <- gmlm_tensor_normal(X, F, max.iter = max.iter)
|
|
# # })
|
|
|
|
# with(hist, {
|
|
# plot(range(iter), range(hist[, -1]), type = "n", log = "y")
|
|
# lines(iter, resid, col = "red")
|
|
# lines(iter, dist.B, col = "green")
|
|
# lines(iter, dist.Omega, col = "blue")
|
|
# legend("topright", legend = c("resid", "dist.B", "dist.Omega"), col = c("red", "green", "blue"), lty = 1)
|
|
# })
|
|
|
|
|
|
# # par(mfrow = c(1, 2))
|
|
|
|
# # matrixImage(Reduce(kronecker, rev(est.params$Omegas)))
|
|
# # matrixImage(Reduce(kronecker, rev(true.params$Omegas)))
|
|
# # # matrixImage(Reduce(kronecker, rev(est.params$Omegas)) - Reduce(kronecker, rev(true.params$Omegas)))
|
|
|
|
# # par(mfrow = c(1, 2))
|
|
|
|
# # matrixImage(Reduce(kronecker, rev(true.params$betas)), main = "True", col = hcl.colors(48, palette = "Blue-Red 3"))
|
|
# # matrixImage(Reduce(kronecker, rev(est.params$betas)), main = "Est", col = hcl.colors(48, palette = "Blue-Red 3"))
|
|
|
|
|
|
# # # unlist(Map(kappa, mcov(X)))
|
|
# # # unlist(Map(kappa, Omegas))
|
|
# # # unlist(Map(kappa, Sigmas))
|
|
|
|
|
|
# # # max(unlist(Map(function(C) max(eigen(C)$values), mcov(X))))
|
|
# # # min(unlist(Map(function(C) min(eigen(C)$values), mcov(X))))
|
|
# # # Map(function(C) eigen(C)$values, mcov(X))
|
|
|
|
# # # kappa(Reduce(kronecker, mcov(X)))
|
|
|
|
# # kappa1.fun <- function(Omegas) Map(kappa, Omegas)
|
|
# # kappa2.fun <- function(Omegas) Map(kappa, Omegas, exact = TRUE)
|
|
# # eigen1.fun <- function(Omegas) {
|
|
# # Map(function(Omega) {
|
|
# # min_max <- range(eigen(Omega)$values)
|
|
# # min_max[2] / min_max[1]
|
|
# # }, Omegas)
|
|
# # }
|
|
# # eigen2.fun <- function(Omegas) {
|
|
# # Map(function(Omega) {
|
|
# # min_max <- range(eigen(Omega, TRUE, TRUE)$values)
|
|
# # min_max[2] / min_max[1]
|
|
# # }, Omegas)
|
|
# # }
|
|
# # microbenchmark::microbenchmark(
|
|
# # kappa1.fun(Omegas),
|
|
# # kappa2.fun(Omegas),
|
|
# # eigen1.fun(Omegas),
|
|
# # eigen2.fun(Omegas)
|
|
# # )
|