add: TSIR covariance regularization,

add: LSIR generalized to tensors
2025-05-02 11:53:03 +02:00 · 2025-05-02 11:53:03 +02:00 · 4bc10018e3
commit 4bc10018e3
parent 13f85bdc16
5 changed files with 123 additions and 70 deletions
--- a/LaTeX/images/reduction.tex
+++ b/LaTeX/images/reduction.tex
@ -1,4 +1,27 @@
-\begin{tikzpicture}[scale = \tikzscale], line width = 1pt]
+\documentclass{standalone}
+
+\usepackage[T1]{fontenc}
+\usepackage{tikz}
+\usepackage{amsmath,amssymb}
+
+% matrices
+\newcommand*{\mat}[1]{\boldsymbol{#1}}
+% tensors (special case for lower case caligraphic letters)
+\newcommand*{\ten}[1]{
+    \ifnum\pdfstrcmp{#1}{`}=1       % lowercase argument
+    \mathfrak{#1}
+    \else                           % uppercase argument
+    \mathcal{#1}
+    \fi
+}
+% matrix transpose
+\renewcommand*{\t}[1]{{#1^{T}}}
+% Expected Value
+\newcommand{\E}{\operatorname{\mathbb{E}}}
+
+
+\begin{document}
+\begin{tikzpicture}[scale = 1.2, line width = 1pt]

    \def\rect#1#2#3{
        \draw (0, 0, 0) -- (#1, 0, 0) -- (#1, #2, 0) -- (0, #2, 0) -- cycle;
@ -28,9 +51,10 @@
    \draw[fill = lightgray, fill opacity = 0.7]
        (0, 2.1, 0) -- (0, 2.1, -2) -- (0, 6.1, -2) -- (0, 6.1, 0) -- cycle;
    \node[opacity = 0.7, cm={0.66, 0.66, 0, 1, (0, 0)}]
-        at (0, 4.1, -1.1) {$\t{\mat{\beta}_3}$};
+        at (0, 5.3, -1.1) {$\t{\mat{\beta}_3}$};

    \draw[lightgray, line width = 0.7pt] (0, 5.1) arc (90:0:3);
    \draw[fill = lightgray, fill opacity = 0.7] (0, 2.1) rectangle +(1.5, 3)
        node [pos = 0.5] {$\t{\mat{\beta}_2}$};
 \end{tikzpicture}
+\end{document}
--- a/LaTeX/plots/aggr-2d-ising.csv
+++ b/LaTeX/plots/aggr-2d-ising.csv
@ -1,6 +1,6 @@
 sample.size rep time.gmlm dist.subspace.gmlm dist.projection.gmlm time.tnormal dist.subspace.tnormal dist.projection.tnormal time.pca dist.subspace.pca dist.projection.pca time.hopca dist.subspace.hopca dist.projection.hopca time.lpca dist.subspace.lpca dist.projection.lpca time.clpca dist.subspace.clpca dist.projection.clpca time.tsir dist.subspace.tsir dist.projection.tsir time.mgcca dist.subspace.mgcca dist.projection.mgcca
-100 50.5 3.03076 0.3036349361 0.3036349361 -1 0.2868643362 0.2868643362 0.00073 0.818177689 0.984435713 0.00063 0.755946643 0.755946643 0.05648 0.819181741 0.953681387 0.0867 0.822312566 0.986781953 0.00613 0.4019671947 0.4019671947 0.04573 0.588313518 0.75377487
-200 50.5 3.10805 0.1992945381 0.1992945381 -1 0.2020230182 0.2020230182 0.00074 0.814338816 0.987890451 0.00093 0.690811228 0.690811228 0.05801 0.81457008 0.961377759 0.08991 0.825394861 0.991647771 0.00599 0.2511707246 0.2511707246 0.04656 0.476761868 0.629632653
-300 50.5 3.3052 0.1546113425 0.1546113425 -1 0.1537117318 0.1537117318 0.00075 0.798497064 0.993693989 0.00099 0.729634976 0.729634976 0.05696 0.820224046 0.982252313 0.09375 0.806834524 0.992228985 0.00636 0.1790215654 0.1790215654 0.04835 0.37049092 0.4917621054
-500 50.5 3.88258 0.1130383355 0.1130383355 -1 0.11118393534 0.11118393534 0.00095 0.817445534 0.996960022 0.00148 0.651972301 0.651972301 0.07627 0.816560225 0.989645322 0.11561 0.837453121 0.996154736 0.00715 0.1217933129 0.1217933129 0.06199 0.2348135641 0.3010364601
-750 50.5 3.97107 0.09585699718 0.09585699718 -1 0.09463671116 0.09463671116 0.00106 0.829576522 0.996509465 0.00179 0.6203066621 0.6203066621 0.09122 0.820908322 0.988956119 0.13158 0.849250091 0.997066922 0.00765 0.10393120662 0.10393120662 0.06512 0.1950222554 0.2482127769
+100 50.5 4.78067 0.2687172315 0.2687172315 -1 0.2740385439 0.2740385439 0.00142 0.879931169 0.955995509 0.00105 0.927846756 0.927846756 0.08114 0.823009087 0.924925441 0.12154 0.856587662 0.952690667 0.00723 0.4439691501 0.4439691501 0.05715 0.686201489 0.888446221
+200 50.5 4.89815 0.2088401765 0.2088401765 -1 0.2130251361 0.2130251361 0.00154 0.892216118 0.956475415 0.00147 0.955911213 0.955911213 0.08683 0.821679399 0.918999512 0.12544 0.867839167 0.956083632 0.00747 0.2895647172 0.2895647172 0.05528 0.618966058 0.818845212
+300 50.5 4.61123 0.1607295666 0.1607295666 -1 0.1669095997 0.1669095997 0.0015 0.895647105 0.951015895 0.00166 0.96455755 0.96455755 0.09651 0.823301202 0.926266165 0.12515 0.866042789 0.954992902 0.00736 0.1983123012 0.1983123012 0.04981 0.584343635 0.780637806
+500 50.5 4.02391 0.12353912092 0.12353912092 -1 0.12367372452 0.12367372452 0.00136 0.901367785 0.949912861 0.00178 0.976238781 0.976238781 0.10512 0.821751266 0.917064858 0.11746 0.882995254 0.962274667 0.00687 0.1500581675 0.1500581675 0.04545 0.586647965 0.797306219
+750 50.5 4.01303 0.1039492242 0.1039492242 -1 0.10122631012 0.10122631012 0.00144 0.903545694 0.947406206 0.00235 0.982184029 0.982184029 0.12965 0.821035531 0.916146581 0.13098 0.878463364 0.957372071 0.00792 0.11926051548 0.11926051548 0.05085 0.560734835 0.763031534
--- a/tensorPredictors/R/LSIR.R
+++ b/tensorPredictors/R/LSIR.R
@ -3,66 +3,69 @@
 #' @param X matrix of dim \eqn{n \times p t} with each row representing a
 #'  vectorized \eqn{p \times t} observation.
 #' @param y vector of \eqn{n} elements as factors. (can be coersed to factors)
-#' @param p,t,k,r dimensions.
-#'
-#' @returns a list with components
-#'  alpha: matrix of \eqn{t \times r}
-#'  beta: matrix of \eqn{p \times k}
 #'
 #' TODO: finish
 #'
 #' @export
-LSIR <- function(X, y, p, t, k = 1L, r = 1L) {
-    # the code assumes:
-    # alpha: T x r, beta: p x k, X_i: p x T, for ith observation
-
-    # Check and transform parameters.
-    if (!is.matrix(X)) X <- as.matrix(X)
-    n <- nrow(X)
-    stopifnot(
-        ncol(X) == p * t,
-        n == length(y)
-    )
-    if (!is.factor(y)) y <- factor(y)
-
-    # Restructure X into a 3D tensor with axis (observations, predictors, time).
-    dim(X) <- c(n, p, t)
-
-    # Estimate predictor/time covariance matrices \hat{Sigma}_1, \hat{Sigma}_2.
-    sigma_p <- matrix(rowMeans(apply(X, 3, cov)), p, p)
-    sigma_t <- matrix(rowMeans(apply(X, 2, cov)), t, t)
-
-    # Normalize X as vec(Z) = Sigma^-1/2 (vec(X) - E(vec(X)))
-    dim(X) <- c(n, p * t)
-    sigma_p_isqrt <- matpow(sigma_p, -0.5)
-    sigma_t_isqrt <- matpow(sigma_t, -0.5)
-    Z <- scale(X, scale = FALSE) %*% kronecker(sigma_t_isqrt, sigma_p_isqrt)
-    # Both as 3D tensors.
-    dim(X) <- dim(Z) <- c(n, p, t)
-
-    # Estimate the conditional predictor/time covariance matrix Omega = cov(E(Z|Y)).
-    omega_p <- matrix(Reduce(`+`, lapply(levels(y), function(l) {
-        rowMeans(apply(Z[y == l, , ], 3, function(z) {
-            (nrow(z) / n) * tcrossprod(colMeans(z))
-        }))
-    })), p, p)
-    omega_t <- matrix(Reduce(`+`, lapply(levels(y), function(l) {
-        rowMeans(apply(Z[y == l, , ], 2, function(z) {
-            (nrow(z) / n) * tcrossprod(colMeans(z))
-        }))
-    })), t, t)
-    omega <- kronecker(omega_t, omega_p)
-
-    # Compute seperate SVD of estimated omega's and use that for an estimate of
-    # a central subspace basis.
-    svd_p <- La.svd(omega_p)
-    svd_t <- La.svd(omega_t)
-    beta  <- sigma_p_isqrt %*% svd_p$u[, k]
-    alpha <- sigma_t_isqrt %*% svd_t$u[, r]
-
-    return(list(sigma_p = sigma_p, sigma_t = sigma_t,
-                sigma = kronecker(sigma_t, sigma_p),
-                alpha = alpha, beta = beta,
-                Delta = omega,
-                B = kronecker(alpha, beta)))
+LSIR <- function(X, y,
+    reduction.dims = rep(1L, length(dim(X)) - 1L),
+    sample.axis = 1L,
+    nr.slices = 10L,    # default slices, ignored if y is a factor or integer
+    eps = sqrt(.Machine$double.eps),
+    slice.method = c("cut", "ecdf")   # ignored if y is a factor or integer
+) {
+    # In case of `y` not descrete, group `y` into slices
+    if (!(is.factor(y) || is.integer(y))) {
+        slice.method <- match.arg(slice.method)
+        if (slice.method == "ecdf") {
+            y <- cut(ecdf(y)(y), nr.slices)
+        } else {
+            y <- cut(y, nr.slices)
+            # ensure there are no empty slices
+            if (any(table(y) == 0)) {
+                y <- as.factor(as.integer(y))
+            }
+        }
+    }
+    if (!is.factor(y)) {
+        y <- factor(y)
+    }
+
+    stopifnot(dim(X)[sample.axis] == length(y))
+
+    # rearrange `X` such that the first axis enumerates observations
+    axis.perm <- c(sample.axis, seq_along(dim(X))[-sample.axis])
+    X <- aperm(X, axis.perm)
+
+    modes <- seq_along(dim(X))[-1L]
+    n <- dim(X)[1L]
+
+    sigmas <- lapply(seq_along(modes), function(i) {
+        matrix(rowMeans(apply(X, modes[-i], cov)), dim(X)[modes[i]])
+    })
+    # Omega_i = Sigma_i^{-1 / 2}
+    isqrt_sigmas <- Map(matpow, sigmas, -1 / 2)
+
+    # Normalize observations
+    Z <- mlm(X - rep(colMeans(X), each = dim(X)[1L]), isqrt_sigmas, modes = modes)
+    
+    # Estimate conditional covariances Omega = Cov(E[Z | Y])
+    slice.args <- c(
+        list(Z), rep(alist(, )[1], length(dim(X))), list(drop = FALSE)
+    )
+    omegas <- lapply(seq_along(modes), function(i) {
+        matrix(Reduce(`+`, lapply(levels(y), function(l) {
+            slice.args[[2]] <- y == l
+            rowMeans(apply(do.call(`[`, slice.args), modes[-i], function(z) {
+                (nrow(z) / n) * tcrossprod(colMeans(z))
+            }))
+        })), dim(X)[modes[i]])
+    })
+
+    # Compute central subspace basis estimate
+    betas <- mapply(function(isqrt_sigma, omega, reduction_dim) {
+        isqrt_sigma %*% La.svd(omega, reduction_dim, 0L)$u
+    }, isqrt_sigmas, omegas, reduction.dims, SIMPLIFY = FALSE)
+
+    list(betas = betas)
 }
--- a/tensorPredictors/R/TSIR.R
+++ b/tensorPredictors/R/TSIR.R
@ -1,11 +1,14 @@
 #' Tensor Sliced Inverse Regression
 #'
 #' @export
-TSIR <- function(X, y, d, sample.axis = 1L,
+TSIR <- function(X, y, 
+    reduction.dims = rep(1L, length(dim(X)) - 1L),
+    sample.axis = 1L,
    nr.slices = 10L,    # default slices, ignored if y is a factor or integer
    max.iter = 50L,
-    eps = sqrt(.Machine$double.eps),
-    slice.method = c("cut", "ecdf")   # ignored if y is a factor or integer
+    cond.threshold = Inf, eps = sqrt(.Machine$double.eps),
+    slice.method = c("cut", "ecdf"),  # ignored if y is a factor or integer
+    logger = NULL
 ) {

    if (!(is.factor(y) || is.integer(y))) {
@ -23,7 +26,7 @@ TSIR <- function(X, y, d, sample.axis = 1L,

    stopifnot(exprs = {
        dim(X)[sample.axis] == length(y)
-        length(d) + 1L == length(dim(X))
+        length(reduction.dims) + 1L == length(dim(X))
    })

    # rearrange `X`, `Fy` such that the last axis enumerates observations
@ -60,7 +63,7 @@ TSIR <- function(X, y, d, sample.axis = 1L,
            mcrossprod(rowMeans(X_s - c(mu_s), dims = length(modes)), mode = k)
        }, slices, slice.means, slice.sizes)) / nr.slices
    }, modes)
-    Gammas <- Map(`[[`, Map(La.svd, Sigmas, nu = d, nv = 0), list("u"))
+    Gammas <- Map(`[[`, Map(La.svd, Sigmas, nu = reduction.dims, nv = 0), list("u"))

    # setup projections as `Proj_k = Gamma_k Gamma_k'`
    projections <- Map(tcrossprod, Gammas)
@ -70,6 +73,12 @@ TSIR <- function(X, y, d, sample.axis = 1L,
        n_s * sum((X_s - mlm(X_s, projections))^2)
    }, slice.means, slice.sizes))

+    # invoke the logger
+    if (is.function(logger)) do.call(logger, list(
+        iter = 0L, Gammas = Gammas, Sigmas = Sigmas, loss = loss.last
+    ))
+
+
    # Iterate till convergence
    for (iter in seq_len(max.iter)) {
        # For each mode assume mode reduction `Gamma`s fixed and update
@ -86,7 +95,7 @@ TSIR <- function(X, y, d, sample.axis = 1L,
            }, slice.proj, slice.sizes))

            # Recompute mode `k` basis `Gamma_k`
-            Gammas[[k]] <- La.svd(Sigmas[[k]], nu = d[k], nv = 0)$u
+            Gammas[[k]] <- La.svd(Sigmas[[k]], nu = reduction.dims[k], nv = 0)$u

            # update mode `k` projection matrix onto the new span of `Gamma_k`
            projections[[k]] <- tcrossprod(Gammas[[k]])
@ -97,6 +106,11 @@ TSIR <- function(X, y, d, sample.axis = 1L,
            }, slice.means, slice.sizes))
        }

+        # invoke the logger
+        if (is.function(logger)) do.call(logger, list(
+            iter = iter, Gammas = Gammas, Sigmas = Sigmas, loss = loss
+        ))
+
        # check break condition
        if (abs(loss - loss.last) < eps * loss) {
            break
@ -107,6 +121,18 @@ TSIR <- function(X, y, d, sample.axis = 1L,
    # mode sample covariance matrices
    Omegas <- Map(function(k) n^-1 * mcrossprod(X, mode = k), modes)

+    # Check condition of sample covariances and perform regularization if needed
+    if (is.finite(cond.threshold)) {
+        for (j in seq_along(Omegas)) {
+            # Compute min and max eigen values
+            min_max <- range(eigen(Omegas[[j]], TRUE, TRUE)$values)
+            # The condition is approximately `kappa(Omegas[[j]]) > cond.threshold`
+            if (min_max[2] > cond.threshold * min_max[1]) {
+                Omegas[[j]] <- Omegas[[j]] + diag(0.2 * min_max[2], nrow(Omegas[[j]]))
+            }
+        }
+    }
+
    # reductions matrices `Omega_k^-1 Gamma_k` where there (reverse) kronecker
    # product spans the central tensor subspace (CTS) estimate
    structure(Map(solve, Omegas, Gammas), mcov = Omegas, Gammas = Gammas)
--- a/tensorPredictors/R/gmlm_tensor_normal.R
+++ b/tensorPredictors/R/gmlm_tensor_normal.R
@ -47,7 +47,7 @@ gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)),
    Omegas <- Map(diag, dimX)

    # Per mode covariance directions
-    # Note: (the directions are transposed!)
+    # Note: directions are transposed!
    dirsX <- Map(function(Sigma) {
        SVD <- La.svd(Sigma, nu = 0)
        sqrt(SVD$d) * SVD$vt
@ -85,8 +85,8 @@ gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)),
    ### Phase 2: (Block) Coordinate Descent
    for (iter in seq_len(max.iter)) {

-        # update every beta (in random order)
-        for (j in sample.int(length(betas))) {
+        # update every beta
+        for (j in seq_along(betas)) {
            FxB_j <- mlm(F, betas[-j], modes[-j])
            FxSB_j <- mlm(FxB_j, Sigmas[-j], modes[-j])
            betas[[j]] <- Omegas[[j]] %*% t(solve(mcrossprod(FxSB_j, FxB_j, j), mcrossprod(FxB_j, X, j)))