fix: gmlm_tensor_normal loss calc changed to numerically more stable version,

add: matrix rownames, colnames support to matrixImage
2023-12-30 10:04:35 +01:00 · 2023-12-30 10:04:35 +01:00 · 13d3c63575
commit 13d3c63575
parent fa2a99f3f0
4 changed files with 74 additions and 124 deletions
--- a/sim/sim-tsir.R
+++ b/sim/sim-tsir.R
@ -1,35 +1,49 @@
 library(tensorPredictors)
+suppressPackageStartupMessages(library(Rdimtools))
+
+# Source utility function used in most simulations (extracted for convenience)
+setwd("~/Work/tensorPredictors/sim/")
+source("./sim_utils.R")

 # Data set sample size in every simulation
 sample.size <- 500L
 # Nr. of per simulation replications
-reps <- 100L
+reps <- 10L
 # number of observation/response axes (order of the tensors)
 orders <- c(2L, 3L, 4L)
 # auto correlation coefficient for the mode-wise auto scatter matrices `Omegas`
-rhos <- seq(0, 0.8, by = 0.1)
+rhos <- seq(0, 0.8, by = 0.2)


 setwd("~/Work/tensorPredictors/sim/")
-base.name <- format(Sys.time(), "failure_of_tsir-%Y%m%dT%H%M")
-
+base.name <- format(Sys.time(), "sim-tsir-%Y%m%dT%H%M")
 # data sampling routine
 sample.data <- function(sample.size, betas, Omegas) {
    dimF <- mapply(ncol, betas)

    # responce is a standard normal variable
-    y <- rnorm(sample.size)
+    y <- sort(rnorm(sample.size))
    y.pow <- Reduce(function(a, b) outer(a, b, `+`), Map(seq, 0L, len = dimF))
-    F <- t(outer(y, as.vector(y.pow), `^`))
+    F <- t(outer(y, as.vector(y.pow), `^`)) / as.vector(factorial(y.pow))
    dim(F) <- c(dimF, sample.size)

+    matplot(mat(F, length(dim(F))), type = "l")
+    abline(h = 0, lty = "dashed")
+
+    matplot(y, scale(mat(F, length(dim(F))), scale = FALSE), type = "l")
+    abline(h = 0, lty = "dashed")
+
+
    # sample predictors from tensor normal X | Y = y (last axis is sample axis)
    sample.axis <- length(betas) + 1L
    Sigmas <- Map(solve, Omegas)
    mu_y <- mlm(F, Map(`%*%`, Sigmas, betas))
    X <- mu_y + rtensornorm(sample.size, 0, Sigmas, sample.axis)

-    list(X = X, F = F, y = y, sample.axis = sample.axis)
+    # Make `y` into a `Y` tensor with variable axis all of dim 1
+    Y <- array(y, dim = c(rep(1L, length(dimF)), sample.size))
+
+    list(X = X, F = F, Y = Y, sample.axis = sample.axis)
 }

 # Create a CSV logger to write simulation results to
@ -37,7 +51,7 @@ log.file <- paste(base.name, "csv", sep = ".")
 logger <- CSV.logger(
    file.name = log.file,
    header = c("rho", "order", "sample.size", "rep", "beta.version", outer(
-        "dist.subspace", c("gmlm", "tsir", "sir"),
+        "dist.subspace", c("gmlm", "gmlm.1d", "tsir", "sir"),
        paste, sep = "."
    ))
 )
@ -66,22 +80,25 @@ for (order in orders) {
        # Version 1: repeated simulations
        for (rep in seq_len(reps)) {
            # Sample training data
-            c(X, F, y, sample.axis) %<-% sample.data(sample.size, betas, Omegas)
+            c(X, F, Y, sample.axis) %<-% sample.data(sample.size, betas, Omegas)

            # Fit models to provided data
-            fit.gmlm <- gmlm_tensor_normal(X, F, sample.axis = sample.axis, proj.betas = proj.betas)
-            fit.tsir <- TSIR(X, y, d = rep(1L, order), sample.axis = sample.axis)
-            fit.sir  <- SIR(mat(X, sample.axis), y, d = 1L)
+            fit.gmlm  <- gmlm_tensor_normal(X, F, sample.axis = sample.axis, proj.betas = proj.betas)
+            fit.gmlm.y <- gmlm_tensor_normal(X, Y, sample.axis = sample.axis)
+            fit.tsir  <- TSIR(X, drop(Y), d = rep(1L, order), sample.axis = sample.axis)
+            fit.sir   <- do.sir(mat(X, sample.axis), drop(Y), ndim = 1L)

            # Extract minimal reduction matrices from fitted models
-            B.gmlm <- qr.Q(qr(Reduce(kronecker, rev(fit.gmlm$betas))))[, 1L, drop = FALSE]
-            B.tsir <- Reduce(kronecker, rev(fit.tsir))
-            B.sir  <- fit.sir
+            B.gmlm  <- qr.Q(qr(Reduce(kronecker, rev(fit.gmlm$betas))))[, 1L, drop = FALSE]
+            B.gmlm.y <- Reduce(kronecker, rev(fit.gmlm.y$betas))
+            B.tsir  <- Reduce(kronecker, rev(fit.tsir))
+            B.sir   <- fit.sir$projection

            # Compute estimation to true minimal `B` distance
-            dist.subspace.gmlm <- dist.subspace(B.min.true, B.gmlm, normalize = TRUE)
-            dist.subspace.tsir <- dist.subspace(B.min.true, B.tsir, normalize = TRUE)
-            dist.subspace.sir  <- dist.subspace(B.min.true, B.sir,  normalize = TRUE)
+            dist.subspace.gmlm   <- dist.subspace(B.min.true, B.gmlm,   normalize = TRUE)
+            dist.subspace.gmlm.y <- dist.subspace(B.min.true, B.gmlm.y, normalize = TRUE)
+            dist.subspace.tsir   <- dist.subspace(B.min.true, B.tsir,   normalize = TRUE)
+            dist.subspace.sir    <- dist.subspace(B.min.true, B.sir,    normalize = TRUE)

            # Write to simulation log file (CSV file)
            logger()
@ -104,22 +121,25 @@ for (order in orders) {
        # Version 2: repeated simulations (identical to Version 1)
        for (rep in seq_len(reps)) {
            # Sample training data
-            c(X, F, y, sample.axis) %<-% sample.data(sample.size, betas, Omegas)
+            c(X, F, Y, sample.axis) %<-% sample.data(sample.size, betas, Omegas)

            # Fit models to provided data
-            fit.gmlm <- gmlm_tensor_normal(X, F, sample.axis = sample.axis, proj.betas = proj.betas)
-            fit.tsir <- TSIR(X, y, d = rep(1L, order), sample.axis = sample.axis)
-            fit.sir  <- SIR(mat(X, sample.axis), y, d = 1L)
+            fit.gmlm  <- gmlm_tensor_normal(X, F, sample.axis = sample.axis, proj.betas = proj.betas)
+            fit.gmlm.y <- gmlm_tensor_normal(X, Y, sample.axis = sample.axis)
+            fit.tsir  <- TSIR(X, drop(Y), d = rep(1L, order), sample.axis = sample.axis)
+            fit.sir   <- do.sir(mat(X, sample.axis), drop(Y), ndim = 1L)

            # Extract minimal reduction matrices from fitted models
-            B.gmlm <- qr.Q(qr(Reduce(kronecker, rev(fit.gmlm$betas))))[, 1L, drop = FALSE]
-            B.tsir <- Reduce(kronecker, rev(fit.tsir))
-            B.sir  <- fit.sir
+            B.gmlm  <- qr.Q(qr(Reduce(kronecker, rev(fit.gmlm$betas))))[, 1L, drop = FALSE]
+            B.gmlm.y <- Reduce(kronecker, rev(fit.gmlm.y$betas))
+            B.tsir  <- Reduce(kronecker, rev(fit.tsir))
+            B.sir   <- fit.sir$projection

            # Compute estimation to true minimal `B` distance
-            dist.subspace.gmlm <- dist.subspace(B.min.true, B.gmlm, normalize = TRUE)
-            dist.subspace.tsir <- dist.subspace(B.min.true, B.tsir, normalize = TRUE)
-            dist.subspace.sir  <- dist.subspace(B.min.true, B.sir,  normalize = TRUE)
+            dist.subspace.gmlm   <- dist.subspace(B.min.true, B.gmlm,   normalize = TRUE)
+            dist.subspace.gmlm.y <- dist.subspace(B.min.true, B.gmlm.y, normalize = TRUE)
+            dist.subspace.tsir   <- dist.subspace(B.min.true, B.tsir,   normalize = TRUE)
+            dist.subspace.sir    <- dist.subspace(B.min.true, B.sir,    normalize = TRUE)

            # Write to simulation log file (CSV file)
            logger()
@ -155,6 +175,13 @@ layout(rbind(
    2 * length(orders) + 1
 ), heights = c(rep(6L, length(orders)), 1L))

+col.methods <- c(
+    gmlm   = "#000000",
+    gmlm.y = "#FF0000",
+    tsir   = "#009E73",
+    sir    = "#999999"
+)
+
 for (group in split(aggr, aggr[c("order", "beta.version")])) {
    order <- group$order[[1]]
    beta.version <- group$beta.version[[1]]
@ -166,9 +193,10 @@ for (group in split(aggr, aggr[c("order", "beta.version")])) {
    axis(1, at = rho)
    axis(2, at = seq(0, 1, by = 0.2))
    with(group, {
-        lines(rho, dist.subspace.gmlm, col = col.methods["gmlm"], lwd = 3, type = "b", pch = 16)
-        lines(rho, dist.subspace.tsir, col = col.methods["tsir"], lwd = 2, type = "b", pch = 16)
-        lines(rho, dist.subspace.sir,  col = col.methods["sir"],  lwd = 2, type = "b", pch = 16)
+        lines(rho, dist.subspace.gmlm,   col = col.methods["gmlm"],   lwd = 3, type = "b", pch = 16)
+        lines(rho, dist.subspace.gmlm.y, col = col.methods["gmlm.y"], lwd = 3, type = "b", pch = 16)
+        lines(rho, dist.subspace.tsir,   col = col.methods["tsir"],   lwd = 2, type = "b", pch = 16)
+        lines(rho, dist.subspace.sir,    col = col.methods["sir"],    lwd = 2, type = "b", pch = 16)
    })
    if (order == 3L && beta.version == 2L) {
        abline(v = 0.5, lty = "dotted", col = "black")
@ -176,49 +204,7 @@ for (group in split(aggr, aggr[c("order", "beta.version")])) {
            lty = "dotted", col = "black")
    }
 }
-methods <- c("GMLM", "TSIR", "SIR")
-restor.par <- par(
-    fig = c(0, 1, 0, 1),
-    oma = c(0, 0, 0, 0),
-    mar = c(1, 0, 0, 0),
-    new = TRUE
-)
-plot(0, type = "n", bty = "n", axes = FALSE, xlab = "", ylab = "")
-legend("bottom", col = col.methods[tolower(methods)], legend = methods,
-    horiz = TRUE, lty = 1, bty = "n", lwd = c(3, 2, 2), pch = 16)
-par(restor.par)
-
-
-# new grouping for the aggregates
-layout(rbind(
-    matrix(seq_len(2 * 3), ncol = 2),
-    2 * 3 + 1
-), heights = c(rep(6L, 3), 1L))
-
-for (group in split(aggr, aggr[c("rho", "beta.version")])) {
-    rho <- group$rho[[1]]
-    beta.version <- group$beta.version[[1]]
-
-    if (!(rho %in% c(0, .5, .8))) { next }
-
-    order <- group$order
-
-    plot(range(order), 0:1, main = sprintf("V%d, rho %.1f", beta.version, rho),
-        type = "n", bty = "n", axes = FALSE, xlab = expression(order), ylab = "Subspace Distance")
-    axis(1, at = order)
-    axis(2, at = seq(0, 1, by = 0.2))
-    with(group, {
-        lines(order, dist.subspace.gmlm, col = col.methods["gmlm"], lwd = 3, type = "b", pch = 16)
-        lines(order, dist.subspace.tsir, col = col.methods["tsir"], lwd = 2, type = "b", pch = 16)
-        lines(order, dist.subspace.sir,  col = col.methods["sir"],  lwd = 2, type = "b", pch = 16)
-    })
-    if (rho == 0.5 && beta.version == 2L) {
-        abline(v = 0.5, lty = "dotted", col = "black")
-        abline(h = group$dist.subspace.tsir[which(order == 3L)],
-            lty = "dotted", col = "black")
-    }
-}
-methods <- c("GMLM", "TSIR", "SIR")
+methods <- c("GMLM", "GMLM.y", "TSIR", "SIR")
 restor.par <- par(
    fig = c(0, 1, 0, 1),
    oma = c(0, 0, 0, 0),
--- a/tensorPredictors/R/gmlm_tensor_normal.R
+++ b/tensorPredictors/R/gmlm_tensor_normal.R
@ -88,7 +88,7 @@ gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)),
        # Residuals
        R <- X - mlm(F, Map(`%*%`, Sigmas, betas))

-        # Covariance Estimates (moment based, TODO: implement MLE estimate!)
+        # Covariance Estimates
        Sigmas <- mcov(R, sample.axis, center = FALSE)

        # Computing `Omega_j`s, the j'th mode presition matrices, in conjunction
@ -111,9 +111,16 @@ gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)),
            }
        }

-        # store last loss and compute new value
+        # store last loss
        loss.last <- loss
-        loss <- mean(R * mlm(R, Omegas)) - sum(log(mapply(det, Omegas)) / dimX)
+        # Numerically more stable version of `sum(log(mapply(det, Omegas)) / dimX)`
+        # which is itself equivalent to `log(det(Omega)) / prod(nrow(Omega))` where
+        # `Omega <- Reduce(kronecker, rev(Omegas))`.
+        det.Omega <- sum(mapply(function(Omega) {
+            sum(log(eigen(Omega, TRUE, TRUE)$values))
+        }, Omegas) / dimX)
+        # Compute new loss
+        loss <- mean(R * mlm(R, Omegas)) - det.Omega

        # invoke the logger
        if (is.function(logger)) do.call(logger, list(
--- a/tensorPredictors/R/mat_mani_projections.R
+++ b/tensorPredictors/R/mat_mani_projections.R
@ -73,48 +73,3 @@ projStiefel <- function(A) {
        `[<-`(matrix(0, nrow(A), ncol(A)), mask, mean(A[mask]))
    }
 }
-
-#' Projections onto matrix manifolds
-#'
-#' @examples
-#' p <- 5
-#' q <- 4
-#' A <- matrix(rnorm(p * q), p, q)
-#'
-#' # General Matrices
-#' matProj("TriDiag", dim(A))(A)
-#' matProj("Band", dim(A), low = 1, high = 2)(A)
-#' matProj("Rank", rank = 2)(A)
-#' matProj("Stiefel")(A)
-#'
-#' # Symmetric projections need square matrices
-#' S <- matrix(rnorm(p^2), p)
-#'
-#' matProj("Sym")(S)
-#' matProj("SymTriDiag", dim(S))(S)
-#' matProj("SymBand", dim(S), low = 1, high = 2)(S)
-#' matProj("PSD")(S)
-#' matProj("SymRank", rank = 1)(S)
-#'
-#' @rdname matProj
-#'
-#' @export
-matProj <- function(manifold, dims = NULL, low = NULL, high = NULL, sym = FALSE, rank = NULL) {
-
-    switch(tolower(manifold),
-        identity    = identity,
-        sym         = projSym,
-        tridiag     = .projBand(dims, 1L, 1L),
-        symtridiag  = .projSymBand(dims, 1L, 1L),
-        band        = .projBand(dims, low, high),
-        symband     = .projSymBand(dims, low, high),
-        psd         = .projPSD(sym),
-        rank        = .projRank(rank),
-        symrank     = .projSymRank(rank),
-        stiefel     = projStiefel
-    )
-
-}
-
-# #' Basis of ....
-# mat.proj.basis <- function(manifold, dims = NULL, low = NULL, high = NULL, sym = FALSE, rank = NULL) ...
--- a/tensorPredictors/R/matrixImage.R
+++ b/tensorPredictors/R/matrixImage.R
@ -47,8 +47,10 @@ matrixImage <- function(A, add.values = FALSE,
    x <- seq(1, ncol(A), by = 1)
    y <- seq(1, nrow(A))
    if (axes && new.plot) {
-        axis(1, at = x - 0.5, labels = x, lwd = 0, lwd.ticks = 1)
-        axis(2, at = y - 0.5, labels = rev(y), lwd = 0, lwd.ticks = 1, las = 1)
+        if (!is.character(xlabels <- colnames(A))) { xlabels <- x }
+        if (!is.character(ylabels <- rownames(A))) { ylabels <- y }
+        axis(1, at = x - 0.5, labels = xlabels, lwd = 0, lwd.ticks = 1)
+        axis(2, at = y - 0.5, labels = rev(ylabels), lwd = 0, lwd.ticks = 1, las = 1)
    }

    # Writes matrix values