add: TSIR covariance regularization,

add: LSIR generalized to tensors
This commit is contained in:
Daniel Kapla 2025-05-02 11:53:03 +02:00
parent 13f85bdc16
commit 4bc10018e3
5 changed files with 123 additions and 70 deletions

View File

@ -1,4 +1,27 @@
\begin{tikzpicture}[scale = \tikzscale], line width = 1pt]
\documentclass{standalone}
\usepackage[T1]{fontenc}
\usepackage{tikz}
\usepackage{amsmath,amssymb}
% matrices
\newcommand*{\mat}[1]{\boldsymbol{#1}}
% tensors (special case for lower case caligraphic letters)
\newcommand*{\ten}[1]{
\ifnum\pdfstrcmp{#1}{`}=1 % lowercase argument
\mathfrak{#1}
\else % uppercase argument
\mathcal{#1}
\fi
}
% matrix transpose
\renewcommand*{\t}[1]{{#1^{T}}}
% Expected Value
\newcommand{\E}{\operatorname{\mathbb{E}}}
\begin{document}
\begin{tikzpicture}[scale = 1.2, line width = 1pt]
\def\rect#1#2#3{
\draw (0, 0, 0) -- (#1, 0, 0) -- (#1, #2, 0) -- (0, #2, 0) -- cycle;
@ -28,9 +51,10 @@
\draw[fill = lightgray, fill opacity = 0.7]
(0, 2.1, 0) -- (0, 2.1, -2) -- (0, 6.1, -2) -- (0, 6.1, 0) -- cycle;
\node[opacity = 0.7, cm={0.66, 0.66, 0, 1, (0, 0)}]
at (0, 4.1, -1.1) {$\t{\mat{\beta}_3}$};
at (0, 5.3, -1.1) {$\t{\mat{\beta}_3}$};
\draw[lightgray, line width = 0.7pt] (0, 5.1) arc (90:0:3);
\draw[fill = lightgray, fill opacity = 0.7] (0, 2.1) rectangle +(1.5, 3)
node [pos = 0.5] {$\t{\mat{\beta}_2}$};
\end{tikzpicture}
\end{document}

View File

@ -1,6 +1,6 @@
sample.size rep time.gmlm dist.subspace.gmlm dist.projection.gmlm time.tnormal dist.subspace.tnormal dist.projection.tnormal time.pca dist.subspace.pca dist.projection.pca time.hopca dist.subspace.hopca dist.projection.hopca time.lpca dist.subspace.lpca dist.projection.lpca time.clpca dist.subspace.clpca dist.projection.clpca time.tsir dist.subspace.tsir dist.projection.tsir time.mgcca dist.subspace.mgcca dist.projection.mgcca
100 50.5 3.03076 0.3036349361 0.3036349361 -1 0.2868643362 0.2868643362 0.00073 0.818177689 0.984435713 0.00063 0.755946643 0.755946643 0.05648 0.819181741 0.953681387 0.0867 0.822312566 0.986781953 0.00613 0.4019671947 0.4019671947 0.04573 0.588313518 0.75377487
200 50.5 3.10805 0.1992945381 0.1992945381 -1 0.2020230182 0.2020230182 0.00074 0.814338816 0.987890451 0.00093 0.690811228 0.690811228 0.05801 0.81457008 0.961377759 0.08991 0.825394861 0.991647771 0.00599 0.2511707246 0.2511707246 0.04656 0.476761868 0.629632653
300 50.5 3.3052 0.1546113425 0.1546113425 -1 0.1537117318 0.1537117318 0.00075 0.798497064 0.993693989 0.00099 0.729634976 0.729634976 0.05696 0.820224046 0.982252313 0.09375 0.806834524 0.992228985 0.00636 0.1790215654 0.1790215654 0.04835 0.37049092 0.4917621054
500 50.5 3.88258 0.1130383355 0.1130383355 -1 0.11118393534 0.11118393534 0.00095 0.817445534 0.996960022 0.00148 0.651972301 0.651972301 0.07627 0.816560225 0.989645322 0.11561 0.837453121 0.996154736 0.00715 0.1217933129 0.1217933129 0.06199 0.2348135641 0.3010364601
750 50.5 3.97107 0.09585699718 0.09585699718 -1 0.09463671116 0.09463671116 0.00106 0.829576522 0.996509465 0.00179 0.6203066621 0.6203066621 0.09122 0.820908322 0.988956119 0.13158 0.849250091 0.997066922 0.00765 0.10393120662 0.10393120662 0.06512 0.1950222554 0.2482127769
100 50.5 4.78067 0.2687172315 0.2687172315 -1 0.2740385439 0.2740385439 0.00142 0.879931169 0.955995509 0.00105 0.927846756 0.927846756 0.08114 0.823009087 0.924925441 0.12154 0.856587662 0.952690667 0.00723 0.4439691501 0.4439691501 0.05715 0.686201489 0.888446221
200 50.5 4.89815 0.2088401765 0.2088401765 -1 0.2130251361 0.2130251361 0.00154 0.892216118 0.956475415 0.00147 0.955911213 0.955911213 0.08683 0.821679399 0.918999512 0.12544 0.867839167 0.956083632 0.00747 0.2895647172 0.2895647172 0.05528 0.618966058 0.818845212
300 50.5 4.61123 0.1607295666 0.1607295666 -1 0.1669095997 0.1669095997 0.0015 0.895647105 0.951015895 0.00166 0.96455755 0.96455755 0.09651 0.823301202 0.926266165 0.12515 0.866042789 0.954992902 0.00736 0.1983123012 0.1983123012 0.04981 0.584343635 0.780637806
500 50.5 4.02391 0.12353912092 0.12353912092 -1 0.12367372452 0.12367372452 0.00136 0.901367785 0.949912861 0.00178 0.976238781 0.976238781 0.10512 0.821751266 0.917064858 0.11746 0.882995254 0.962274667 0.00687 0.1500581675 0.1500581675 0.04545 0.586647965 0.797306219
750 50.5 4.01303 0.1039492242 0.1039492242 -1 0.10122631012 0.10122631012 0.00144 0.903545694 0.947406206 0.00235 0.982184029 0.982184029 0.12965 0.821035531 0.916146581 0.13098 0.878463364 0.957372071 0.00792 0.11926051548 0.11926051548 0.05085 0.560734835 0.763031534

1 sample.size rep time.gmlm dist.subspace.gmlm dist.projection.gmlm time.tnormal dist.subspace.tnormal dist.projection.tnormal time.pca dist.subspace.pca dist.projection.pca time.hopca dist.subspace.hopca dist.projection.hopca time.lpca dist.subspace.lpca dist.projection.lpca time.clpca dist.subspace.clpca dist.projection.clpca time.tsir dist.subspace.tsir dist.projection.tsir time.mgcca dist.subspace.mgcca dist.projection.mgcca
2 100 50.5 3.03076 0.3036349361 0.3036349361 -1 0.2868643362 0.2868643362 0.00073 0.818177689 0.984435713 0.00063 0.755946643 0.755946643 0.05648 0.819181741 0.953681387 0.0867 0.822312566 0.986781953 0.00613 0.4019671947 0.4019671947 0.04573 0.588313518 0.75377487 100 50.5 4.78067 0.2687172315 0.2687172315 -1 0.2740385439 0.2740385439 0.00142 0.879931169 0.955995509 0.00105 0.927846756 0.927846756 0.08114 0.823009087 0.924925441 0.12154 0.856587662 0.952690667 0.00723 0.4439691501 0.4439691501 0.05715 0.686201489 0.888446221
3 200 50.5 3.10805 0.1992945381 0.1992945381 -1 0.2020230182 0.2020230182 0.00074 0.814338816 0.987890451 0.00093 0.690811228 0.690811228 0.05801 0.81457008 0.961377759 0.08991 0.825394861 0.991647771 0.00599 0.2511707246 0.2511707246 0.04656 0.476761868 0.629632653 200 50.5 4.89815 0.2088401765 0.2088401765 -1 0.2130251361 0.2130251361 0.00154 0.892216118 0.956475415 0.00147 0.955911213 0.955911213 0.08683 0.821679399 0.918999512 0.12544 0.867839167 0.956083632 0.00747 0.2895647172 0.2895647172 0.05528 0.618966058 0.818845212
4 300 50.5 3.3052 0.1546113425 0.1546113425 -1 0.1537117318 0.1537117318 0.00075 0.798497064 0.993693989 0.00099 0.729634976 0.729634976 0.05696 0.820224046 0.982252313 0.09375 0.806834524 0.992228985 0.00636 0.1790215654 0.1790215654 0.04835 0.37049092 0.4917621054 300 50.5 4.61123 0.1607295666 0.1607295666 -1 0.1669095997 0.1669095997 0.0015 0.895647105 0.951015895 0.00166 0.96455755 0.96455755 0.09651 0.823301202 0.926266165 0.12515 0.866042789 0.954992902 0.00736 0.1983123012 0.1983123012 0.04981 0.584343635 0.780637806
5 500 50.5 3.88258 0.1130383355 0.1130383355 -1 0.11118393534 0.11118393534 0.00095 0.817445534 0.996960022 0.00148 0.651972301 0.651972301 0.07627 0.816560225 0.989645322 0.11561 0.837453121 0.996154736 0.00715 0.1217933129 0.1217933129 0.06199 0.2348135641 0.3010364601 500 50.5 4.02391 0.12353912092 0.12353912092 -1 0.12367372452 0.12367372452 0.00136 0.901367785 0.949912861 0.00178 0.976238781 0.976238781 0.10512 0.821751266 0.917064858 0.11746 0.882995254 0.962274667 0.00687 0.1500581675 0.1500581675 0.04545 0.586647965 0.797306219
6 750 50.5 3.97107 0.09585699718 0.09585699718 -1 0.09463671116 0.09463671116 0.00106 0.829576522 0.996509465 0.00179 0.6203066621 0.6203066621 0.09122 0.820908322 0.988956119 0.13158 0.849250091 0.997066922 0.00765 0.10393120662 0.10393120662 0.06512 0.1950222554 0.2482127769 750 50.5 4.01303 0.1039492242 0.1039492242 -1 0.10122631012 0.10122631012 0.00144 0.903545694 0.947406206 0.00235 0.982184029 0.982184029 0.12965 0.821035531 0.916146581 0.13098 0.878463364 0.957372071 0.00792 0.11926051548 0.11926051548 0.05085 0.560734835 0.763031534

View File

@ -3,66 +3,69 @@
#' @param X matrix of dim \eqn{n \times p t} with each row representing a
#' vectorized \eqn{p \times t} observation.
#' @param y vector of \eqn{n} elements as factors. (can be coersed to factors)
#' @param p,t,k,r dimensions.
#'
#' @returns a list with components
#' alpha: matrix of \eqn{t \times r}
#' beta: matrix of \eqn{p \times k}
#'
#' TODO: finish
#'
#' @export
LSIR <- function(X, y, p, t, k = 1L, r = 1L) {
# the code assumes:
# alpha: T x r, beta: p x k, X_i: p x T, for ith observation
# Check and transform parameters.
if (!is.matrix(X)) X <- as.matrix(X)
n <- nrow(X)
stopifnot(
ncol(X) == p * t,
n == length(y)
)
if (!is.factor(y)) y <- factor(y)
# Restructure X into a 3D tensor with axis (observations, predictors, time).
dim(X) <- c(n, p, t)
# Estimate predictor/time covariance matrices \hat{Sigma}_1, \hat{Sigma}_2.
sigma_p <- matrix(rowMeans(apply(X, 3, cov)), p, p)
sigma_t <- matrix(rowMeans(apply(X, 2, cov)), t, t)
# Normalize X as vec(Z) = Sigma^-1/2 (vec(X) - E(vec(X)))
dim(X) <- c(n, p * t)
sigma_p_isqrt <- matpow(sigma_p, -0.5)
sigma_t_isqrt <- matpow(sigma_t, -0.5)
Z <- scale(X, scale = FALSE) %*% kronecker(sigma_t_isqrt, sigma_p_isqrt)
# Both as 3D tensors.
dim(X) <- dim(Z) <- c(n, p, t)
# Estimate the conditional predictor/time covariance matrix Omega = cov(E(Z|Y)).
omega_p <- matrix(Reduce(`+`, lapply(levels(y), function(l) {
rowMeans(apply(Z[y == l, , ], 3, function(z) {
(nrow(z) / n) * tcrossprod(colMeans(z))
}))
})), p, p)
omega_t <- matrix(Reduce(`+`, lapply(levels(y), function(l) {
rowMeans(apply(Z[y == l, , ], 2, function(z) {
(nrow(z) / n) * tcrossprod(colMeans(z))
}))
})), t, t)
omega <- kronecker(omega_t, omega_p)
# Compute seperate SVD of estimated omega's and use that for an estimate of
# a central subspace basis.
svd_p <- La.svd(omega_p)
svd_t <- La.svd(omega_t)
beta <- sigma_p_isqrt %*% svd_p$u[, k]
alpha <- sigma_t_isqrt %*% svd_t$u[, r]
return(list(sigma_p = sigma_p, sigma_t = sigma_t,
sigma = kronecker(sigma_t, sigma_p),
alpha = alpha, beta = beta,
Delta = omega,
B = kronecker(alpha, beta)))
LSIR <- function(X, y,
reduction.dims = rep(1L, length(dim(X)) - 1L),
sample.axis = 1L,
nr.slices = 10L, # default slices, ignored if y is a factor or integer
eps = sqrt(.Machine$double.eps),
slice.method = c("cut", "ecdf") # ignored if y is a factor or integer
) {
# In case of `y` not descrete, group `y` into slices
if (!(is.factor(y) || is.integer(y))) {
slice.method <- match.arg(slice.method)
if (slice.method == "ecdf") {
y <- cut(ecdf(y)(y), nr.slices)
} else {
y <- cut(y, nr.slices)
# ensure there are no empty slices
if (any(table(y) == 0)) {
y <- as.factor(as.integer(y))
}
}
}
if (!is.factor(y)) {
y <- factor(y)
}
stopifnot(dim(X)[sample.axis] == length(y))
# rearrange `X` such that the first axis enumerates observations
axis.perm <- c(sample.axis, seq_along(dim(X))[-sample.axis])
X <- aperm(X, axis.perm)
modes <- seq_along(dim(X))[-1L]
n <- dim(X)[1L]
sigmas <- lapply(seq_along(modes), function(i) {
matrix(rowMeans(apply(X, modes[-i], cov)), dim(X)[modes[i]])
})
# Omega_i = Sigma_i^{-1 / 2}
isqrt_sigmas <- Map(matpow, sigmas, -1 / 2)
# Normalize observations
Z <- mlm(X - rep(colMeans(X), each = dim(X)[1L]), isqrt_sigmas, modes = modes)
# Estimate conditional covariances Omega = Cov(E[Z | Y])
slice.args <- c(
list(Z), rep(alist(, )[1], length(dim(X))), list(drop = FALSE)
)
omegas <- lapply(seq_along(modes), function(i) {
matrix(Reduce(`+`, lapply(levels(y), function(l) {
slice.args[[2]] <- y == l
rowMeans(apply(do.call(`[`, slice.args), modes[-i], function(z) {
(nrow(z) / n) * tcrossprod(colMeans(z))
}))
})), dim(X)[modes[i]])
})
# Compute central subspace basis estimate
betas <- mapply(function(isqrt_sigma, omega, reduction_dim) {
isqrt_sigma %*% La.svd(omega, reduction_dim, 0L)$u
}, isqrt_sigmas, omegas, reduction.dims, SIMPLIFY = FALSE)
list(betas = betas)
}

View File

@ -1,11 +1,14 @@
#' Tensor Sliced Inverse Regression
#'
#' @export
TSIR <- function(X, y, d, sample.axis = 1L,
TSIR <- function(X, y,
reduction.dims = rep(1L, length(dim(X)) - 1L),
sample.axis = 1L,
nr.slices = 10L, # default slices, ignored if y is a factor or integer
max.iter = 50L,
eps = sqrt(.Machine$double.eps),
slice.method = c("cut", "ecdf") # ignored if y is a factor or integer
cond.threshold = Inf, eps = sqrt(.Machine$double.eps),
slice.method = c("cut", "ecdf"), # ignored if y is a factor or integer
logger = NULL
) {
if (!(is.factor(y) || is.integer(y))) {
@ -23,7 +26,7 @@ TSIR <- function(X, y, d, sample.axis = 1L,
stopifnot(exprs = {
dim(X)[sample.axis] == length(y)
length(d) + 1L == length(dim(X))
length(reduction.dims) + 1L == length(dim(X))
})
# rearrange `X`, `Fy` such that the last axis enumerates observations
@ -60,7 +63,7 @@ TSIR <- function(X, y, d, sample.axis = 1L,
mcrossprod(rowMeans(X_s - c(mu_s), dims = length(modes)), mode = k)
}, slices, slice.means, slice.sizes)) / nr.slices
}, modes)
Gammas <- Map(`[[`, Map(La.svd, Sigmas, nu = d, nv = 0), list("u"))
Gammas <- Map(`[[`, Map(La.svd, Sigmas, nu = reduction.dims, nv = 0), list("u"))
# setup projections as `Proj_k = Gamma_k Gamma_k'`
projections <- Map(tcrossprod, Gammas)
@ -70,6 +73,12 @@ TSIR <- function(X, y, d, sample.axis = 1L,
n_s * sum((X_s - mlm(X_s, projections))^2)
}, slice.means, slice.sizes))
# invoke the logger
if (is.function(logger)) do.call(logger, list(
iter = 0L, Gammas = Gammas, Sigmas = Sigmas, loss = loss.last
))
# Iterate till convergence
for (iter in seq_len(max.iter)) {
# For each mode assume mode reduction `Gamma`s fixed and update
@ -86,7 +95,7 @@ TSIR <- function(X, y, d, sample.axis = 1L,
}, slice.proj, slice.sizes))
# Recompute mode `k` basis `Gamma_k`
Gammas[[k]] <- La.svd(Sigmas[[k]], nu = d[k], nv = 0)$u
Gammas[[k]] <- La.svd(Sigmas[[k]], nu = reduction.dims[k], nv = 0)$u
# update mode `k` projection matrix onto the new span of `Gamma_k`
projections[[k]] <- tcrossprod(Gammas[[k]])
@ -97,6 +106,11 @@ TSIR <- function(X, y, d, sample.axis = 1L,
}, slice.means, slice.sizes))
}
# invoke the logger
if (is.function(logger)) do.call(logger, list(
iter = iter, Gammas = Gammas, Sigmas = Sigmas, loss = loss
))
# check break condition
if (abs(loss - loss.last) < eps * loss) {
break
@ -107,6 +121,18 @@ TSIR <- function(X, y, d, sample.axis = 1L,
# mode sample covariance matrices
Omegas <- Map(function(k) n^-1 * mcrossprod(X, mode = k), modes)
# Check condition of sample covariances and perform regularization if needed
if (is.finite(cond.threshold)) {
for (j in seq_along(Omegas)) {
# Compute min and max eigen values
min_max <- range(eigen(Omegas[[j]], TRUE, TRUE)$values)
# The condition is approximately `kappa(Omegas[[j]]) > cond.threshold`
if (min_max[2] > cond.threshold * min_max[1]) {
Omegas[[j]] <- Omegas[[j]] + diag(0.2 * min_max[2], nrow(Omegas[[j]]))
}
}
}
# reductions matrices `Omega_k^-1 Gamma_k` where there (reverse) kronecker
# product spans the central tensor subspace (CTS) estimate
structure(Map(solve, Omegas, Gammas), mcov = Omegas, Gammas = Gammas)

View File

@ -47,7 +47,7 @@ gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)),
Omegas <- Map(diag, dimX)
# Per mode covariance directions
# Note: (the directions are transposed!)
# Note: directions are transposed!
dirsX <- Map(function(Sigma) {
SVD <- La.svd(Sigma, nu = 0)
sqrt(SVD$d) * SVD$vt
@ -85,8 +85,8 @@ gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)),
### Phase 2: (Block) Coordinate Descent
for (iter in seq_len(max.iter)) {
# update every beta (in random order)
for (j in sample.int(length(betas))) {
# update every beta
for (j in seq_along(betas)) {
FxB_j <- mlm(F, betas[-j], modes[-j])
FxSB_j <- mlm(FxB_j, Sigmas[-j], modes[-j])
betas[[j]] <- Omegas[[j]] %*% t(solve(mcrossprod(FxSB_j, FxB_j, j), mcrossprod(FxB_j, X, j)))