tensor_predictors/tensorPredictors/R/gmlm_tensor_normal.R


# p <- 5
# A <- matrix(rnorm(p^2), p)

# mat.proj("TriDiag", p)(A)
# mat.proj("SymTriDiag", p)(A)

# A
# (AA <- mat.proj("PSD", p)(A))
# mat.proj("PSD", p)(AA)


# p <- 5
# A <- matrix(rnorm(p^2), p)

# projection <- function(T2) {
#     P <- pinv(T2) %*% T2
#     function(A) {
#         matrix(P %*% as.vector(A), nrow(A))
#     }
# }


# # All equal diagonal matrix, A = a * I_p
# T2 <- t(as.vector(diag(p)))
# proj <- projection(T2)

# print.table( diag(mean(diag(A)), nrow(A), ncol(A))          , zero.print = ".")
# print.table( matrix((pinv(T2) %*% T2) %*% as.vector(A), p)  , zero.print = ".")
# print.table( proj(A)                                        , zero.print = ".")


# # Diagonal matrix, A = diag(a_1, ..., a_p)
# T2 <- matrix(seq_len(p^2), p)
# T2 <- outer(diag(T2), as.vector(T2), `==`)
# storage.mode(T2) <- "double"
# proj <- projection(T2)

# print.table( T2 , zero.print = ".")

# print.table( diag(diag(A))                                  , zero.print = ".")
# print.table( proj(A)                                        , zero.print = ".")


# # Tri-Diagonal Matrix
# T2 <- matrix(seq_len(p^2), p)
# T2 <- outer(T2[abs(row(A) - col(A)) <= 1], as.vector(T2), `==`)
# storage.mode(T2) <- "double"

# triDiag.mask <- (abs(row(A) - col(A)) <= 1)
# storage.mode(triDiag.mask) <- "double"

# print.table( T2 , zero.print = ".")
# print.table( triDiag.mask , zero.print = ".")

# print.table( A * triDiag.mask                               , zero.print = ".")
# print.table( matrix((pinv(T2) %*% T2) %*% as.vector(A), p)  , zero.print = ".")


# # All equal main and off diagonals
# T2 <- Reduce(rbind, Map(function(i) {
#     as.double(row(A) == i + col(A))
# }, (1 - p):(p - 1)))

# print.table( T2 , zero.print = ".")

# print.table( matrix((pinv(T2) %*% T2) %*% as.vector(A), p)  , zero.print = ".")


# # Symmetric all equal main and off diagonals
# T2 <- Reduce(rbind, Map(function(i) {
#     as.double(abs(row(A) - col(A)) == i)
# }, 0:(p - 1)))

# print.table( T2 , zero.print = ".")

# print.table( matrix((pinv(T2) %*% T2) %*% as.vector(A), p)  , zero.print = ".")


# # Symetric Matrix
# index <- matrix(seq_len(p^2), p)
# T2 <- Reduce(rbind, Map(function(i) {
#     e_i <- index == i
#     as.double(e_i | t(e_i))
# }, index[lower.tri(index, diag = TRUE)]))
# proj <- projection(T2)

# print.table( T2 , zero.print = ".")

# print.table( 0.5 * (A + t(A))   , zero.print = ".")
# print.table( proj(A)            , zero.print = ".")

# proj(solve(A))
# solve(proj(A))


#' Specialized version of GMLM for the tensor normal model
#'
#' The underlying algorithm is an ``iterative (block) coordinate descent'' method
#'
#' @export
gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)),
    max.iter = 100L, proj.betas = NULL, proj.Omegas = NULL, logger = NULL,       # TODO: proj.betas NOT USED!!!
    cond.threshold = 25, eps = 1e-6
) {
    # rearrange `X`, `F` such that the last axis enumerates observations
    if (!missing(sample.axis)) {
        axis.perm <- c(seq_along(dim(X))[-sample.axis], sample.axis)
        X <- aperm(X, axis.perm)
        F <- aperm(F, axis.perm)
        sample.axis <- length(dim(X))
    }

    # Get problem dimensions (observation dimensions)
    dimX <- head(dim(X), -1)
    dimF <- head(dim(F), -1)
    modes <- seq_along(dimX)

    # Ensure the Omega and beta projections lists are lists
    if (!is.list(proj.Omegas)) {
        proj.Omegas <- rep(NULL, length(modes))
    }
    if (!is.list(proj.betas)) {
        proj.betas <- rep(NULL, length(modes))
    }


    ### Phase 1: Computing initial values
    meanX <- rowMeans(X, dims = length(dimX))
    meanF <- rowMeans(F, dims = length(dimF))

    # center X and F
    X <- X - as.vector(meanX)
    F <- F - as.vector(meanF)

    # initialize Omega estimates as mode-wise, unconditional covariance estimates
    Sigmas <- Map(diag, dimX)
    Omegas <- Map(diag, dimX)

    # Per mode covariance directions
    # Note: (the directions are transposed!)
    dirsX <- Map(function(Sigma) {
        SVD <- La.svd(Sigma, nu = 0)
        sqrt(SVD$d) * SVD$vt
    }, mcov(X, sample.axis, center = FALSE))
    dirsF <- Map(function(Sigma) {
        SVD <- La.svd(Sigma, nu = 0)
        sqrt(SVD$d) * SVD$vt
    }, mcov(F, sample.axis, center = FALSE))

    # initialization of betas ``covariance direction mappings``
    betas <- Map(function(dX, dF) {
        s <- min(ncol(dX), nrow(dF))
        crossprod(dX[1:s, , drop = FALSE], dF[1:s, , drop = FALSE])
    }, dirsX, dirsF)

    # Residuals
    R <- X - mlm(F, Map(`%*%`, Sigmas, betas))

    # Initial value of the log-likelihood (scaled and constants dropped)
    loss <- mean(R * mlm(R, Omegas)) - sum(log(mapply(det, Omegas)) / dimX)

    # invoke the logger
    if (is.function(logger)) do.call(logger, list(
        iter = 0L, betas = betas, Omegas = Omegas,
        resid = R, loss = loss
    ))


    ### Phase 2: (Block) Coordinate Descent
    for (iter in seq_len(max.iter)) {

        # update every beta (in random order)
        for (j in sample.int(length(betas))) {
            FxB_j <- mlm(F, betas[-j], modes[-j])
            FxSB_j <- mlm(FxB_j, Sigmas[-j], modes[-j])
            betas[[j]] <- Omegas[[j]] %*% t(solve(mcrossprod(FxSB_j, FxB_j, j), mcrossprod(FxB_j, X, j)))
            # Project `betas` onto their manifold
            if (is.function(proj_j <- proj.betas[[j]])) {
                betas[[j]] <- proj_j(betas[[j]])
            }
        }

        # Residuals
        R <- X - mlm(F, Map(`%*%`, Sigmas, betas))

        # Covariance Estimates (moment based, TODO: implement MLE estimate!)
        Sigmas <- mcov(R, sample.axis, center = FALSE)

        # Computing `Omega_j`s, the j'th mode presition matrices, in conjunction
        # with regularization of the j'th mode covariance estimate `Sigma_j`
        for (j in seq_along(Sigmas)) {
            # Compute min and max eigen values
            min_max <- range(eigen(Sigmas[[j]], TRUE, TRUE)$values)
            # The condition is approximately `kappa(Sigmas[[j]]) > cond.threshold`
            if (min_max[2] > cond.threshold * min_max[1]) {
                Sigmas[[j]] <- Sigmas[[j]] + diag(0.2 * min_max[2], nrow(Sigmas[[j]]))
            }
            # Compute (unconstraint but regularized) Omega_j as covariance inverse
            Omegas[[j]] <- solve(Sigmas[[j]])
            # Project Omega_j to the Omega_j's manifold
            if (is.function(proj_j <- proj.Omegas[[j]])) {
                Omegas[[j]] <- proj_j(Omegas[[j]])
                # Reverse computation of `Sigma_j` as inverse of `Omega_j`
                # Order of projecting Omega_j and then recomputing Sigma_j is importent
                Sigmas[[j]] <- solve(Omegas[[j]])
            }
        }

        # store last loss and compute new value
        loss.last <- loss
        loss <- mean(R * mlm(R, Omegas)) - sum(log(mapply(det, Omegas)) / dimX)

        # invoke the logger
        if (is.function(logger)) do.call(logger, list(
            iter = iter, betas = betas, Omegas = Omegas, resid = R, loss = loss
        ))

        # check the break consition
        if (abs(loss.last - loss) < eps * loss.last) {
            break
        }
    }


    list(eta1 = meanX, betas = betas, Omegas = Omegas)
}


# #### DEBUGGING
# library(tensorPredictors)

# # setup dimensions
# n <- 1e3
# p <- c(5, 7, 3)
# q <- c(2, 5, 3)

# max.iter <- 10L

# # create "true" GLM parameters
# eta1 <- array(rnorm(prod(p)), dim = p)
# betas <- Map(matrix, Map(rnorm, p * q), p)
# Omegas <- Map(function(p_j) {
#     solve(0.5^abs(outer(seq_len(p_j), seq_len(p_j), `-`)))
# }, p)
# true.params <- list(eta1 = eta1, betas = betas, Omegas = Omegas)

# # compute tensor normal parameters from GMLM parameters
# Sigmas <- Map(solve, Omegas)
# mu <- mlm(eta1, Sigmas)

# # sample some test data
# sample.axis <- length(p) + 1L
# F <- array(rnorm(n * prod(q)), dim = c(q, n))
# X <- mlm(F, Map(`%*%`, Sigmas, betas)) + rtensornorm(n, mu, Sigmas, sample.axis)

# # setup a logging callback
# format <- sprintf("iter: %%3d, %s, Omega: %%8.2f, resid: %%8.3f\n",
#     paste0("beta.", seq_along(betas), ": %.3f", collapse = ", ")
# )
# hist <- data.frame(
#     iter = seq(0, max.iter),
#     dist.B = numeric(max.iter + 1),
#     dist.Omega = numeric(max.iter + 1),
#     resid = numeric(max.iter + 1)
# )
# logger <- function(iter, betas, Omegas, resid) {
#     dist.B <- dist.kron.norm(true.params$betas, betas)
#     dist.Omega <- dist.kron.norm(true.params$Omegas, Omegas)
#     resid <- mean(resid^2)

#     hist[iter + 1, -1] <<- c(dist.B = dist.B, dist.Omega = dist.Omega, resid = resid)
#     cat(do.call(sprintf, c(
#         list(format, iter),
#         Map(dist.subspace, betas, true.params$betas),
#         dist.Omega, resid
#     )))
# }

# # run the model
# est.params <- gmlm_tensor_normal(X, F, max.iter = max.iter, logger = logger)

# # run model with Omegas in sub-manifolds of SPD matrices
# est.params <- gmlm_tensor_normal(X, F, max.iter = max.iter, logger = logger,
#     proj.Omegas = list(
#         NULL,
#         (function(nrow, ncol) {
#             triDiag.mask <- (abs(.row(c(nrow, ncol)) - .col(c(nrow, ncol))) <= 1)
#             storage.mode(triDiag.mask) <- "double"
#             function(A) { A * triDiag.mask }
#         })(p[2], p[2]),
#         function(A) diag(diag(A))
#     ))


# # # Profile the fitting routine without logging
# # profvis::profvis({
# #     est.params <- gmlm_tensor_normal(X, F, max.iter = max.iter)
# # })

# with(hist, {
#     plot(range(iter), range(hist[, -1]), type = "n", log = "y")
#     lines(iter, resid, col = "red")
#     lines(iter, dist.B, col = "green")
#     lines(iter, dist.Omega, col = "blue")
#     legend("topright", legend = c("resid", "dist.B", "dist.Omega"), col = c("red", "green", "blue"), lty = 1)
# })


# # par(mfrow = c(1, 2))

# # matrixImage(Reduce(kronecker, rev(est.params$Omegas)))
# # matrixImage(Reduce(kronecker, rev(true.params$Omegas)))
# # # matrixImage(Reduce(kronecker, rev(est.params$Omegas)) - Reduce(kronecker, rev(true.params$Omegas)))

# # par(mfrow = c(1, 2))

# # matrixImage(Reduce(kronecker, rev(true.params$betas)), main = "True", col = hcl.colors(48, palette = "Blue-Red 3"))
# # matrixImage(Reduce(kronecker, rev(est.params$betas)), main = "Est", col = hcl.colors(48, palette = "Blue-Red 3"))


# # # unlist(Map(kappa, mcov(X)))
# # # unlist(Map(kappa, Omegas))
# # # unlist(Map(kappa, Sigmas))


# # # max(unlist(Map(function(C) max(eigen(C)$values), mcov(X))))
# # # min(unlist(Map(function(C) min(eigen(C)$values), mcov(X))))
# # # Map(function(C) eigen(C)$values, mcov(X))

# # # kappa(Reduce(kronecker, mcov(X)))

# # kappa1.fun <- function(Omegas) Map(kappa, Omegas)
# # kappa2.fun <- function(Omegas) Map(kappa, Omegas, exact = TRUE)
# # eigen1.fun <- function(Omegas) {
# #     Map(function(Omega) {
# #         min_max <- range(eigen(Omega)$values)
# #         min_max[2] / min_max[1]
# #     }, Omegas)
# # }
# # eigen2.fun <- function(Omegas) {
# #     Map(function(Omega) {
# #         min_max <- range(eigen(Omega, TRUE, TRUE)$values)
# #         min_max[2] / min_max[1]
# #     }, Omegas)
# # }
# # microbenchmark::microbenchmark(
# #     kappa1.fun(Omegas),
# #     kappa2.fun(Omegas),
# #     eigen1.fun(Omegas),
# #     eigen2.fun(Omegas)
# # )