add: TSIR covariance regularization,

add: LSIR generalized to tensors
This commit is contained in:
Daniel Kapla 2025-05-02 11:53:03 +02:00
parent 13f85bdc16
commit 4bc10018e3
5 changed files with 123 additions and 70 deletions

View File

@ -1,4 +1,27 @@
\begin{tikzpicture}[scale = \tikzscale], line width = 1pt] \documentclass{standalone}
\usepackage[T1]{fontenc}
\usepackage{tikz}
\usepackage{amsmath,amssymb}
% matrices
\newcommand*{\mat}[1]{\boldsymbol{#1}}
% tensors (special case for lower case caligraphic letters)
\newcommand*{\ten}[1]{
\ifnum\pdfstrcmp{#1}{`}=1 % lowercase argument
\mathfrak{#1}
\else % uppercase argument
\mathcal{#1}
\fi
}
% matrix transpose
\renewcommand*{\t}[1]{{#1^{T}}}
% Expected Value
\newcommand{\E}{\operatorname{\mathbb{E}}}
\begin{document}
\begin{tikzpicture}[scale = 1.2, line width = 1pt]
\def\rect#1#2#3{ \def\rect#1#2#3{
\draw (0, 0, 0) -- (#1, 0, 0) -- (#1, #2, 0) -- (0, #2, 0) -- cycle; \draw (0, 0, 0) -- (#1, 0, 0) -- (#1, #2, 0) -- (0, #2, 0) -- cycle;
@ -28,9 +51,10 @@
\draw[fill = lightgray, fill opacity = 0.7] \draw[fill = lightgray, fill opacity = 0.7]
(0, 2.1, 0) -- (0, 2.1, -2) -- (0, 6.1, -2) -- (0, 6.1, 0) -- cycle; (0, 2.1, 0) -- (0, 2.1, -2) -- (0, 6.1, -2) -- (0, 6.1, 0) -- cycle;
\node[opacity = 0.7, cm={0.66, 0.66, 0, 1, (0, 0)}] \node[opacity = 0.7, cm={0.66, 0.66, 0, 1, (0, 0)}]
at (0, 4.1, -1.1) {$\t{\mat{\beta}_3}$}; at (0, 5.3, -1.1) {$\t{\mat{\beta}_3}$};
\draw[lightgray, line width = 0.7pt] (0, 5.1) arc (90:0:3); \draw[lightgray, line width = 0.7pt] (0, 5.1) arc (90:0:3);
\draw[fill = lightgray, fill opacity = 0.7] (0, 2.1) rectangle +(1.5, 3) \draw[fill = lightgray, fill opacity = 0.7] (0, 2.1) rectangle +(1.5, 3)
node [pos = 0.5] {$\t{\mat{\beta}_2}$}; node [pos = 0.5] {$\t{\mat{\beta}_2}$};
\end{tikzpicture} \end{tikzpicture}
\end{document}

View File

@ -1,6 +1,6 @@
sample.size rep time.gmlm dist.subspace.gmlm dist.projection.gmlm time.tnormal dist.subspace.tnormal dist.projection.tnormal time.pca dist.subspace.pca dist.projection.pca time.hopca dist.subspace.hopca dist.projection.hopca time.lpca dist.subspace.lpca dist.projection.lpca time.clpca dist.subspace.clpca dist.projection.clpca time.tsir dist.subspace.tsir dist.projection.tsir time.mgcca dist.subspace.mgcca dist.projection.mgcca sample.size rep time.gmlm dist.subspace.gmlm dist.projection.gmlm time.tnormal dist.subspace.tnormal dist.projection.tnormal time.pca dist.subspace.pca dist.projection.pca time.hopca dist.subspace.hopca dist.projection.hopca time.lpca dist.subspace.lpca dist.projection.lpca time.clpca dist.subspace.clpca dist.projection.clpca time.tsir dist.subspace.tsir dist.projection.tsir time.mgcca dist.subspace.mgcca dist.projection.mgcca
100 50.5 3.03076 0.3036349361 0.3036349361 -1 0.2868643362 0.2868643362 0.00073 0.818177689 0.984435713 0.00063 0.755946643 0.755946643 0.05648 0.819181741 0.953681387 0.0867 0.822312566 0.986781953 0.00613 0.4019671947 0.4019671947 0.04573 0.588313518 0.75377487 100 50.5 4.78067 0.2687172315 0.2687172315 -1 0.2740385439 0.2740385439 0.00142 0.879931169 0.955995509 0.00105 0.927846756 0.927846756 0.08114 0.823009087 0.924925441 0.12154 0.856587662 0.952690667 0.00723 0.4439691501 0.4439691501 0.05715 0.686201489 0.888446221
200 50.5 3.10805 0.1992945381 0.1992945381 -1 0.2020230182 0.2020230182 0.00074 0.814338816 0.987890451 0.00093 0.690811228 0.690811228 0.05801 0.81457008 0.961377759 0.08991 0.825394861 0.991647771 0.00599 0.2511707246 0.2511707246 0.04656 0.476761868 0.629632653 200 50.5 4.89815 0.2088401765 0.2088401765 -1 0.2130251361 0.2130251361 0.00154 0.892216118 0.956475415 0.00147 0.955911213 0.955911213 0.08683 0.821679399 0.918999512 0.12544 0.867839167 0.956083632 0.00747 0.2895647172 0.2895647172 0.05528 0.618966058 0.818845212
300 50.5 3.3052 0.1546113425 0.1546113425 -1 0.1537117318 0.1537117318 0.00075 0.798497064 0.993693989 0.00099 0.729634976 0.729634976 0.05696 0.820224046 0.982252313 0.09375 0.806834524 0.992228985 0.00636 0.1790215654 0.1790215654 0.04835 0.37049092 0.4917621054 300 50.5 4.61123 0.1607295666 0.1607295666 -1 0.1669095997 0.1669095997 0.0015 0.895647105 0.951015895 0.00166 0.96455755 0.96455755 0.09651 0.823301202 0.926266165 0.12515 0.866042789 0.954992902 0.00736 0.1983123012 0.1983123012 0.04981 0.584343635 0.780637806
500 50.5 3.88258 0.1130383355 0.1130383355 -1 0.11118393534 0.11118393534 0.00095 0.817445534 0.996960022 0.00148 0.651972301 0.651972301 0.07627 0.816560225 0.989645322 0.11561 0.837453121 0.996154736 0.00715 0.1217933129 0.1217933129 0.06199 0.2348135641 0.3010364601 500 50.5 4.02391 0.12353912092 0.12353912092 -1 0.12367372452 0.12367372452 0.00136 0.901367785 0.949912861 0.00178 0.976238781 0.976238781 0.10512 0.821751266 0.917064858 0.11746 0.882995254 0.962274667 0.00687 0.1500581675 0.1500581675 0.04545 0.586647965 0.797306219
750 50.5 3.97107 0.09585699718 0.09585699718 -1 0.09463671116 0.09463671116 0.00106 0.829576522 0.996509465 0.00179 0.6203066621 0.6203066621 0.09122 0.820908322 0.988956119 0.13158 0.849250091 0.997066922 0.00765 0.10393120662 0.10393120662 0.06512 0.1950222554 0.2482127769 750 50.5 4.01303 0.1039492242 0.1039492242 -1 0.10122631012 0.10122631012 0.00144 0.903545694 0.947406206 0.00235 0.982184029 0.982184029 0.12965 0.821035531 0.916146581 0.13098 0.878463364 0.957372071 0.00792 0.11926051548 0.11926051548 0.05085 0.560734835 0.763031534

1 sample.size rep time.gmlm dist.subspace.gmlm dist.projection.gmlm time.tnormal dist.subspace.tnormal dist.projection.tnormal time.pca dist.subspace.pca dist.projection.pca time.hopca dist.subspace.hopca dist.projection.hopca time.lpca dist.subspace.lpca dist.projection.lpca time.clpca dist.subspace.clpca dist.projection.clpca time.tsir dist.subspace.tsir dist.projection.tsir time.mgcca dist.subspace.mgcca dist.projection.mgcca
2 100 50.5 3.03076 0.3036349361 0.3036349361 -1 0.2868643362 0.2868643362 0.00073 0.818177689 0.984435713 0.00063 0.755946643 0.755946643 0.05648 0.819181741 0.953681387 0.0867 0.822312566 0.986781953 0.00613 0.4019671947 0.4019671947 0.04573 0.588313518 0.75377487 100 50.5 4.78067 0.2687172315 0.2687172315 -1 0.2740385439 0.2740385439 0.00142 0.879931169 0.955995509 0.00105 0.927846756 0.927846756 0.08114 0.823009087 0.924925441 0.12154 0.856587662 0.952690667 0.00723 0.4439691501 0.4439691501 0.05715 0.686201489 0.888446221
3 200 50.5 3.10805 0.1992945381 0.1992945381 -1 0.2020230182 0.2020230182 0.00074 0.814338816 0.987890451 0.00093 0.690811228 0.690811228 0.05801 0.81457008 0.961377759 0.08991 0.825394861 0.991647771 0.00599 0.2511707246 0.2511707246 0.04656 0.476761868 0.629632653 200 50.5 4.89815 0.2088401765 0.2088401765 -1 0.2130251361 0.2130251361 0.00154 0.892216118 0.956475415 0.00147 0.955911213 0.955911213 0.08683 0.821679399 0.918999512 0.12544 0.867839167 0.956083632 0.00747 0.2895647172 0.2895647172 0.05528 0.618966058 0.818845212
4 300 50.5 3.3052 0.1546113425 0.1546113425 -1 0.1537117318 0.1537117318 0.00075 0.798497064 0.993693989 0.00099 0.729634976 0.729634976 0.05696 0.820224046 0.982252313 0.09375 0.806834524 0.992228985 0.00636 0.1790215654 0.1790215654 0.04835 0.37049092 0.4917621054 300 50.5 4.61123 0.1607295666 0.1607295666 -1 0.1669095997 0.1669095997 0.0015 0.895647105 0.951015895 0.00166 0.96455755 0.96455755 0.09651 0.823301202 0.926266165 0.12515 0.866042789 0.954992902 0.00736 0.1983123012 0.1983123012 0.04981 0.584343635 0.780637806
5 500 50.5 3.88258 0.1130383355 0.1130383355 -1 0.11118393534 0.11118393534 0.00095 0.817445534 0.996960022 0.00148 0.651972301 0.651972301 0.07627 0.816560225 0.989645322 0.11561 0.837453121 0.996154736 0.00715 0.1217933129 0.1217933129 0.06199 0.2348135641 0.3010364601 500 50.5 4.02391 0.12353912092 0.12353912092 -1 0.12367372452 0.12367372452 0.00136 0.901367785 0.949912861 0.00178 0.976238781 0.976238781 0.10512 0.821751266 0.917064858 0.11746 0.882995254 0.962274667 0.00687 0.1500581675 0.1500581675 0.04545 0.586647965 0.797306219
6 750 50.5 3.97107 0.09585699718 0.09585699718 -1 0.09463671116 0.09463671116 0.00106 0.829576522 0.996509465 0.00179 0.6203066621 0.6203066621 0.09122 0.820908322 0.988956119 0.13158 0.849250091 0.997066922 0.00765 0.10393120662 0.10393120662 0.06512 0.1950222554 0.2482127769 750 50.5 4.01303 0.1039492242 0.1039492242 -1 0.10122631012 0.10122631012 0.00144 0.903545694 0.947406206 0.00235 0.982184029 0.982184029 0.12965 0.821035531 0.916146581 0.13098 0.878463364 0.957372071 0.00792 0.11926051548 0.11926051548 0.05085 0.560734835 0.763031534

View File

@ -3,66 +3,69 @@
#' @param X matrix of dim \eqn{n \times p t} with each row representing a #' @param X matrix of dim \eqn{n \times p t} with each row representing a
#' vectorized \eqn{p \times t} observation. #' vectorized \eqn{p \times t} observation.
#' @param y vector of \eqn{n} elements as factors. (can be coersed to factors) #' @param y vector of \eqn{n} elements as factors. (can be coersed to factors)
#' @param p,t,k,r dimensions.
#'
#' @returns a list with components
#' alpha: matrix of \eqn{t \times r}
#' beta: matrix of \eqn{p \times k}
#' #'
#' TODO: finish #' TODO: finish
#' #'
#' @export #' @export
LSIR <- function(X, y, p, t, k = 1L, r = 1L) { LSIR <- function(X, y,
# the code assumes: reduction.dims = rep(1L, length(dim(X)) - 1L),
# alpha: T x r, beta: p x k, X_i: p x T, for ith observation sample.axis = 1L,
nr.slices = 10L, # default slices, ignored if y is a factor or integer
eps = sqrt(.Machine$double.eps),
slice.method = c("cut", "ecdf") # ignored if y is a factor or integer
) {
# In case of `y` not descrete, group `y` into slices
if (!(is.factor(y) || is.integer(y))) {
slice.method <- match.arg(slice.method)
if (slice.method == "ecdf") {
y <- cut(ecdf(y)(y), nr.slices)
} else {
y <- cut(y, nr.slices)
# ensure there are no empty slices
if (any(table(y) == 0)) {
y <- as.factor(as.integer(y))
}
}
}
if (!is.factor(y)) {
y <- factor(y)
}
# Check and transform parameters. stopifnot(dim(X)[sample.axis] == length(y))
if (!is.matrix(X)) X <- as.matrix(X)
n <- nrow(X) # rearrange `X` such that the first axis enumerates observations
stopifnot( axis.perm <- c(sample.axis, seq_along(dim(X))[-sample.axis])
ncol(X) == p * t, X <- aperm(X, axis.perm)
n == length(y)
modes <- seq_along(dim(X))[-1L]
n <- dim(X)[1L]
sigmas <- lapply(seq_along(modes), function(i) {
matrix(rowMeans(apply(X, modes[-i], cov)), dim(X)[modes[i]])
})
# Omega_i = Sigma_i^{-1 / 2}
isqrt_sigmas <- Map(matpow, sigmas, -1 / 2)
# Normalize observations
Z <- mlm(X - rep(colMeans(X), each = dim(X)[1L]), isqrt_sigmas, modes = modes)
# Estimate conditional covariances Omega = Cov(E[Z | Y])
slice.args <- c(
list(Z), rep(alist(, )[1], length(dim(X))), list(drop = FALSE)
) )
if (!is.factor(y)) y <- factor(y) omegas <- lapply(seq_along(modes), function(i) {
matrix(Reduce(`+`, lapply(levels(y), function(l) {
slice.args[[2]] <- y == l
rowMeans(apply(do.call(`[`, slice.args), modes[-i], function(z) {
(nrow(z) / n) * tcrossprod(colMeans(z))
}))
})), dim(X)[modes[i]])
})
# Restructure X into a 3D tensor with axis (observations, predictors, time). # Compute central subspace basis estimate
dim(X) <- c(n, p, t) betas <- mapply(function(isqrt_sigma, omega, reduction_dim) {
isqrt_sigma %*% La.svd(omega, reduction_dim, 0L)$u
}, isqrt_sigmas, omegas, reduction.dims, SIMPLIFY = FALSE)
# Estimate predictor/time covariance matrices \hat{Sigma}_1, \hat{Sigma}_2. list(betas = betas)
sigma_p <- matrix(rowMeans(apply(X, 3, cov)), p, p)
sigma_t <- matrix(rowMeans(apply(X, 2, cov)), t, t)
# Normalize X as vec(Z) = Sigma^-1/2 (vec(X) - E(vec(X)))
dim(X) <- c(n, p * t)
sigma_p_isqrt <- matpow(sigma_p, -0.5)
sigma_t_isqrt <- matpow(sigma_t, -0.5)
Z <- scale(X, scale = FALSE) %*% kronecker(sigma_t_isqrt, sigma_p_isqrt)
# Both as 3D tensors.
dim(X) <- dim(Z) <- c(n, p, t)
# Estimate the conditional predictor/time covariance matrix Omega = cov(E(Z|Y)).
omega_p <- matrix(Reduce(`+`, lapply(levels(y), function(l) {
rowMeans(apply(Z[y == l, , ], 3, function(z) {
(nrow(z) / n) * tcrossprod(colMeans(z))
}))
})), p, p)
omega_t <- matrix(Reduce(`+`, lapply(levels(y), function(l) {
rowMeans(apply(Z[y == l, , ], 2, function(z) {
(nrow(z) / n) * tcrossprod(colMeans(z))
}))
})), t, t)
omega <- kronecker(omega_t, omega_p)
# Compute seperate SVD of estimated omega's and use that for an estimate of
# a central subspace basis.
svd_p <- La.svd(omega_p)
svd_t <- La.svd(omega_t)
beta <- sigma_p_isqrt %*% svd_p$u[, k]
alpha <- sigma_t_isqrt %*% svd_t$u[, r]
return(list(sigma_p = sigma_p, sigma_t = sigma_t,
sigma = kronecker(sigma_t, sigma_p),
alpha = alpha, beta = beta,
Delta = omega,
B = kronecker(alpha, beta)))
} }

View File

@ -1,11 +1,14 @@
#' Tensor Sliced Inverse Regression #' Tensor Sliced Inverse Regression
#' #'
#' @export #' @export
TSIR <- function(X, y, d, sample.axis = 1L, TSIR <- function(X, y,
reduction.dims = rep(1L, length(dim(X)) - 1L),
sample.axis = 1L,
nr.slices = 10L, # default slices, ignored if y is a factor or integer nr.slices = 10L, # default slices, ignored if y is a factor or integer
max.iter = 50L, max.iter = 50L,
eps = sqrt(.Machine$double.eps), cond.threshold = Inf, eps = sqrt(.Machine$double.eps),
slice.method = c("cut", "ecdf") # ignored if y is a factor or integer slice.method = c("cut", "ecdf"), # ignored if y is a factor or integer
logger = NULL
) { ) {
if (!(is.factor(y) || is.integer(y))) { if (!(is.factor(y) || is.integer(y))) {
@ -23,7 +26,7 @@ TSIR <- function(X, y, d, sample.axis = 1L,
stopifnot(exprs = { stopifnot(exprs = {
dim(X)[sample.axis] == length(y) dim(X)[sample.axis] == length(y)
length(d) + 1L == length(dim(X)) length(reduction.dims) + 1L == length(dim(X))
}) })
# rearrange `X`, `Fy` such that the last axis enumerates observations # rearrange `X`, `Fy` such that the last axis enumerates observations
@ -60,7 +63,7 @@ TSIR <- function(X, y, d, sample.axis = 1L,
mcrossprod(rowMeans(X_s - c(mu_s), dims = length(modes)), mode = k) mcrossprod(rowMeans(X_s - c(mu_s), dims = length(modes)), mode = k)
}, slices, slice.means, slice.sizes)) / nr.slices }, slices, slice.means, slice.sizes)) / nr.slices
}, modes) }, modes)
Gammas <- Map(`[[`, Map(La.svd, Sigmas, nu = d, nv = 0), list("u")) Gammas <- Map(`[[`, Map(La.svd, Sigmas, nu = reduction.dims, nv = 0), list("u"))
# setup projections as `Proj_k = Gamma_k Gamma_k'` # setup projections as `Proj_k = Gamma_k Gamma_k'`
projections <- Map(tcrossprod, Gammas) projections <- Map(tcrossprod, Gammas)
@ -70,6 +73,12 @@ TSIR <- function(X, y, d, sample.axis = 1L,
n_s * sum((X_s - mlm(X_s, projections))^2) n_s * sum((X_s - mlm(X_s, projections))^2)
}, slice.means, slice.sizes)) }, slice.means, slice.sizes))
# invoke the logger
if (is.function(logger)) do.call(logger, list(
iter = 0L, Gammas = Gammas, Sigmas = Sigmas, loss = loss.last
))
# Iterate till convergence # Iterate till convergence
for (iter in seq_len(max.iter)) { for (iter in seq_len(max.iter)) {
# For each mode assume mode reduction `Gamma`s fixed and update # For each mode assume mode reduction `Gamma`s fixed and update
@ -86,7 +95,7 @@ TSIR <- function(X, y, d, sample.axis = 1L,
}, slice.proj, slice.sizes)) }, slice.proj, slice.sizes))
# Recompute mode `k` basis `Gamma_k` # Recompute mode `k` basis `Gamma_k`
Gammas[[k]] <- La.svd(Sigmas[[k]], nu = d[k], nv = 0)$u Gammas[[k]] <- La.svd(Sigmas[[k]], nu = reduction.dims[k], nv = 0)$u
# update mode `k` projection matrix onto the new span of `Gamma_k` # update mode `k` projection matrix onto the new span of `Gamma_k`
projections[[k]] <- tcrossprod(Gammas[[k]]) projections[[k]] <- tcrossprod(Gammas[[k]])
@ -97,6 +106,11 @@ TSIR <- function(X, y, d, sample.axis = 1L,
}, slice.means, slice.sizes)) }, slice.means, slice.sizes))
} }
# invoke the logger
if (is.function(logger)) do.call(logger, list(
iter = iter, Gammas = Gammas, Sigmas = Sigmas, loss = loss
))
# check break condition # check break condition
if (abs(loss - loss.last) < eps * loss) { if (abs(loss - loss.last) < eps * loss) {
break break
@ -107,6 +121,18 @@ TSIR <- function(X, y, d, sample.axis = 1L,
# mode sample covariance matrices # mode sample covariance matrices
Omegas <- Map(function(k) n^-1 * mcrossprod(X, mode = k), modes) Omegas <- Map(function(k) n^-1 * mcrossprod(X, mode = k), modes)
# Check condition of sample covariances and perform regularization if needed
if (is.finite(cond.threshold)) {
for (j in seq_along(Omegas)) {
# Compute min and max eigen values
min_max <- range(eigen(Omegas[[j]], TRUE, TRUE)$values)
# The condition is approximately `kappa(Omegas[[j]]) > cond.threshold`
if (min_max[2] > cond.threshold * min_max[1]) {
Omegas[[j]] <- Omegas[[j]] + diag(0.2 * min_max[2], nrow(Omegas[[j]]))
}
}
}
# reductions matrices `Omega_k^-1 Gamma_k` where there (reverse) kronecker # reductions matrices `Omega_k^-1 Gamma_k` where there (reverse) kronecker
# product spans the central tensor subspace (CTS) estimate # product spans the central tensor subspace (CTS) estimate
structure(Map(solve, Omegas, Gammas), mcov = Omegas, Gammas = Gammas) structure(Map(solve, Omegas, Gammas), mcov = Omegas, Gammas = Gammas)

View File

@ -47,7 +47,7 @@ gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)),
Omegas <- Map(diag, dimX) Omegas <- Map(diag, dimX)
# Per mode covariance directions # Per mode covariance directions
# Note: (the directions are transposed!) # Note: directions are transposed!
dirsX <- Map(function(Sigma) { dirsX <- Map(function(Sigma) {
SVD <- La.svd(Sigma, nu = 0) SVD <- La.svd(Sigma, nu = 0)
sqrt(SVD$d) * SVD$vt sqrt(SVD$d) * SVD$vt
@ -85,8 +85,8 @@ gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)),
### Phase 2: (Block) Coordinate Descent ### Phase 2: (Block) Coordinate Descent
for (iter in seq_len(max.iter)) { for (iter in seq_len(max.iter)) {
# update every beta (in random order) # update every beta
for (j in sample.int(length(betas))) { for (j in seq_along(betas)) {
FxB_j <- mlm(F, betas[-j], modes[-j]) FxB_j <- mlm(F, betas[-j], modes[-j])
FxSB_j <- mlm(FxB_j, Sigmas[-j], modes[-j]) FxSB_j <- mlm(FxB_j, Sigmas[-j], modes[-j])
betas[[j]] <- Omegas[[j]] %*% t(solve(mcrossprod(FxSB_j, FxB_j, j), mcrossprod(FxB_j, X, j))) betas[[j]] <- Omegas[[j]] %*% t(solve(mcrossprod(FxSB_j, FxB_j, j), mcrossprod(FxB_j, X, j)))