add: TSIR covariance regularization,
add: LSIR generalized to tensors
This commit is contained in:
parent
13f85bdc16
commit
4bc10018e3
|
@ -1,4 +1,27 @@
|
|||
\begin{tikzpicture}[scale = \tikzscale], line width = 1pt]
|
||||
\documentclass{standalone}
|
||||
|
||||
\usepackage[T1]{fontenc}
|
||||
\usepackage{tikz}
|
||||
\usepackage{amsmath,amssymb}
|
||||
|
||||
% matrices
|
||||
\newcommand*{\mat}[1]{\boldsymbol{#1}}
|
||||
% tensors (special case for lower case caligraphic letters)
|
||||
\newcommand*{\ten}[1]{
|
||||
\ifnum\pdfstrcmp{#1}{`}=1 % lowercase argument
|
||||
\mathfrak{#1}
|
||||
\else % uppercase argument
|
||||
\mathcal{#1}
|
||||
\fi
|
||||
}
|
||||
% matrix transpose
|
||||
\renewcommand*{\t}[1]{{#1^{T}}}
|
||||
% Expected Value
|
||||
\newcommand{\E}{\operatorname{\mathbb{E}}}
|
||||
|
||||
|
||||
\begin{document}
|
||||
\begin{tikzpicture}[scale = 1.2, line width = 1pt]
|
||||
|
||||
\def\rect#1#2#3{
|
||||
\draw (0, 0, 0) -- (#1, 0, 0) -- (#1, #2, 0) -- (0, #2, 0) -- cycle;
|
||||
|
@ -28,9 +51,10 @@
|
|||
\draw[fill = lightgray, fill opacity = 0.7]
|
||||
(0, 2.1, 0) -- (0, 2.1, -2) -- (0, 6.1, -2) -- (0, 6.1, 0) -- cycle;
|
||||
\node[opacity = 0.7, cm={0.66, 0.66, 0, 1, (0, 0)}]
|
||||
at (0, 4.1, -1.1) {$\t{\mat{\beta}_3}$};
|
||||
at (0, 5.3, -1.1) {$\t{\mat{\beta}_3}$};
|
||||
|
||||
\draw[lightgray, line width = 0.7pt] (0, 5.1) arc (90:0:3);
|
||||
\draw[fill = lightgray, fill opacity = 0.7] (0, 2.1) rectangle +(1.5, 3)
|
||||
node [pos = 0.5] {$\t{\mat{\beta}_2}$};
|
||||
\end{tikzpicture}
|
||||
\end{document}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
sample.size rep time.gmlm dist.subspace.gmlm dist.projection.gmlm time.tnormal dist.subspace.tnormal dist.projection.tnormal time.pca dist.subspace.pca dist.projection.pca time.hopca dist.subspace.hopca dist.projection.hopca time.lpca dist.subspace.lpca dist.projection.lpca time.clpca dist.subspace.clpca dist.projection.clpca time.tsir dist.subspace.tsir dist.projection.tsir time.mgcca dist.subspace.mgcca dist.projection.mgcca
|
||||
100 50.5 3.03076 0.3036349361 0.3036349361 -1 0.2868643362 0.2868643362 0.00073 0.818177689 0.984435713 0.00063 0.755946643 0.755946643 0.05648 0.819181741 0.953681387 0.0867 0.822312566 0.986781953 0.00613 0.4019671947 0.4019671947 0.04573 0.588313518 0.75377487
|
||||
200 50.5 3.10805 0.1992945381 0.1992945381 -1 0.2020230182 0.2020230182 0.00074 0.814338816 0.987890451 0.00093 0.690811228 0.690811228 0.05801 0.81457008 0.961377759 0.08991 0.825394861 0.991647771 0.00599 0.2511707246 0.2511707246 0.04656 0.476761868 0.629632653
|
||||
300 50.5 3.3052 0.1546113425 0.1546113425 -1 0.1537117318 0.1537117318 0.00075 0.798497064 0.993693989 0.00099 0.729634976 0.729634976 0.05696 0.820224046 0.982252313 0.09375 0.806834524 0.992228985 0.00636 0.1790215654 0.1790215654 0.04835 0.37049092 0.4917621054
|
||||
500 50.5 3.88258 0.1130383355 0.1130383355 -1 0.11118393534 0.11118393534 0.00095 0.817445534 0.996960022 0.00148 0.651972301 0.651972301 0.07627 0.816560225 0.989645322 0.11561 0.837453121 0.996154736 0.00715 0.1217933129 0.1217933129 0.06199 0.2348135641 0.3010364601
|
||||
750 50.5 3.97107 0.09585699718 0.09585699718 -1 0.09463671116 0.09463671116 0.00106 0.829576522 0.996509465 0.00179 0.6203066621 0.6203066621 0.09122 0.820908322 0.988956119 0.13158 0.849250091 0.997066922 0.00765 0.10393120662 0.10393120662 0.06512 0.1950222554 0.2482127769
|
||||
100 50.5 4.78067 0.2687172315 0.2687172315 -1 0.2740385439 0.2740385439 0.00142 0.879931169 0.955995509 0.00105 0.927846756 0.927846756 0.08114 0.823009087 0.924925441 0.12154 0.856587662 0.952690667 0.00723 0.4439691501 0.4439691501 0.05715 0.686201489 0.888446221
|
||||
200 50.5 4.89815 0.2088401765 0.2088401765 -1 0.2130251361 0.2130251361 0.00154 0.892216118 0.956475415 0.00147 0.955911213 0.955911213 0.08683 0.821679399 0.918999512 0.12544 0.867839167 0.956083632 0.00747 0.2895647172 0.2895647172 0.05528 0.618966058 0.818845212
|
||||
300 50.5 4.61123 0.1607295666 0.1607295666 -1 0.1669095997 0.1669095997 0.0015 0.895647105 0.951015895 0.00166 0.96455755 0.96455755 0.09651 0.823301202 0.926266165 0.12515 0.866042789 0.954992902 0.00736 0.1983123012 0.1983123012 0.04981 0.584343635 0.780637806
|
||||
500 50.5 4.02391 0.12353912092 0.12353912092 -1 0.12367372452 0.12367372452 0.00136 0.901367785 0.949912861 0.00178 0.976238781 0.976238781 0.10512 0.821751266 0.917064858 0.11746 0.882995254 0.962274667 0.00687 0.1500581675 0.1500581675 0.04545 0.586647965 0.797306219
|
||||
750 50.5 4.01303 0.1039492242 0.1039492242 -1 0.10122631012 0.10122631012 0.00144 0.903545694 0.947406206 0.00235 0.982184029 0.982184029 0.12965 0.821035531 0.916146581 0.13098 0.878463364 0.957372071 0.00792 0.11926051548 0.11926051548 0.05085 0.560734835 0.763031534
|
||||
|
|
|
|
@ -3,66 +3,69 @@
|
|||
#' @param X matrix of dim \eqn{n \times p t} with each row representing a
|
||||
#' vectorized \eqn{p \times t} observation.
|
||||
#' @param y vector of \eqn{n} elements as factors. (can be coersed to factors)
|
||||
#' @param p,t,k,r dimensions.
|
||||
#'
|
||||
#' @returns a list with components
|
||||
#' alpha: matrix of \eqn{t \times r}
|
||||
#' beta: matrix of \eqn{p \times k}
|
||||
#'
|
||||
#' TODO: finish
|
||||
#'
|
||||
#' @export
|
||||
LSIR <- function(X, y, p, t, k = 1L, r = 1L) {
|
||||
# the code assumes:
|
||||
# alpha: T x r, beta: p x k, X_i: p x T, for ith observation
|
||||
LSIR <- function(X, y,
|
||||
reduction.dims = rep(1L, length(dim(X)) - 1L),
|
||||
sample.axis = 1L,
|
||||
nr.slices = 10L, # default slices, ignored if y is a factor or integer
|
||||
eps = sqrt(.Machine$double.eps),
|
||||
slice.method = c("cut", "ecdf") # ignored if y is a factor or integer
|
||||
) {
|
||||
# In case of `y` not descrete, group `y` into slices
|
||||
if (!(is.factor(y) || is.integer(y))) {
|
||||
slice.method <- match.arg(slice.method)
|
||||
if (slice.method == "ecdf") {
|
||||
y <- cut(ecdf(y)(y), nr.slices)
|
||||
} else {
|
||||
y <- cut(y, nr.slices)
|
||||
# ensure there are no empty slices
|
||||
if (any(table(y) == 0)) {
|
||||
y <- as.factor(as.integer(y))
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!is.factor(y)) {
|
||||
y <- factor(y)
|
||||
}
|
||||
|
||||
# Check and transform parameters.
|
||||
if (!is.matrix(X)) X <- as.matrix(X)
|
||||
n <- nrow(X)
|
||||
stopifnot(
|
||||
ncol(X) == p * t,
|
||||
n == length(y)
|
||||
stopifnot(dim(X)[sample.axis] == length(y))
|
||||
|
||||
# rearrange `X` such that the first axis enumerates observations
|
||||
axis.perm <- c(sample.axis, seq_along(dim(X))[-sample.axis])
|
||||
X <- aperm(X, axis.perm)
|
||||
|
||||
modes <- seq_along(dim(X))[-1L]
|
||||
n <- dim(X)[1L]
|
||||
|
||||
sigmas <- lapply(seq_along(modes), function(i) {
|
||||
matrix(rowMeans(apply(X, modes[-i], cov)), dim(X)[modes[i]])
|
||||
})
|
||||
# Omega_i = Sigma_i^{-1 / 2}
|
||||
isqrt_sigmas <- Map(matpow, sigmas, -1 / 2)
|
||||
|
||||
# Normalize observations
|
||||
Z <- mlm(X - rep(colMeans(X), each = dim(X)[1L]), isqrt_sigmas, modes = modes)
|
||||
|
||||
# Estimate conditional covariances Omega = Cov(E[Z | Y])
|
||||
slice.args <- c(
|
||||
list(Z), rep(alist(, )[1], length(dim(X))), list(drop = FALSE)
|
||||
)
|
||||
if (!is.factor(y)) y <- factor(y)
|
||||
omegas <- lapply(seq_along(modes), function(i) {
|
||||
matrix(Reduce(`+`, lapply(levels(y), function(l) {
|
||||
slice.args[[2]] <- y == l
|
||||
rowMeans(apply(do.call(`[`, slice.args), modes[-i], function(z) {
|
||||
(nrow(z) / n) * tcrossprod(colMeans(z))
|
||||
}))
|
||||
})), dim(X)[modes[i]])
|
||||
})
|
||||
|
||||
# Restructure X into a 3D tensor with axis (observations, predictors, time).
|
||||
dim(X) <- c(n, p, t)
|
||||
# Compute central subspace basis estimate
|
||||
betas <- mapply(function(isqrt_sigma, omega, reduction_dim) {
|
||||
isqrt_sigma %*% La.svd(omega, reduction_dim, 0L)$u
|
||||
}, isqrt_sigmas, omegas, reduction.dims, SIMPLIFY = FALSE)
|
||||
|
||||
# Estimate predictor/time covariance matrices \hat{Sigma}_1, \hat{Sigma}_2.
|
||||
sigma_p <- matrix(rowMeans(apply(X, 3, cov)), p, p)
|
||||
sigma_t <- matrix(rowMeans(apply(X, 2, cov)), t, t)
|
||||
|
||||
# Normalize X as vec(Z) = Sigma^-1/2 (vec(X) - E(vec(X)))
|
||||
dim(X) <- c(n, p * t)
|
||||
sigma_p_isqrt <- matpow(sigma_p, -0.5)
|
||||
sigma_t_isqrt <- matpow(sigma_t, -0.5)
|
||||
Z <- scale(X, scale = FALSE) %*% kronecker(sigma_t_isqrt, sigma_p_isqrt)
|
||||
# Both as 3D tensors.
|
||||
dim(X) <- dim(Z) <- c(n, p, t)
|
||||
|
||||
# Estimate the conditional predictor/time covariance matrix Omega = cov(E(Z|Y)).
|
||||
omega_p <- matrix(Reduce(`+`, lapply(levels(y), function(l) {
|
||||
rowMeans(apply(Z[y == l, , ], 3, function(z) {
|
||||
(nrow(z) / n) * tcrossprod(colMeans(z))
|
||||
}))
|
||||
})), p, p)
|
||||
omega_t <- matrix(Reduce(`+`, lapply(levels(y), function(l) {
|
||||
rowMeans(apply(Z[y == l, , ], 2, function(z) {
|
||||
(nrow(z) / n) * tcrossprod(colMeans(z))
|
||||
}))
|
||||
})), t, t)
|
||||
omega <- kronecker(omega_t, omega_p)
|
||||
|
||||
# Compute seperate SVD of estimated omega's and use that for an estimate of
|
||||
# a central subspace basis.
|
||||
svd_p <- La.svd(omega_p)
|
||||
svd_t <- La.svd(omega_t)
|
||||
beta <- sigma_p_isqrt %*% svd_p$u[, k]
|
||||
alpha <- sigma_t_isqrt %*% svd_t$u[, r]
|
||||
|
||||
return(list(sigma_p = sigma_p, sigma_t = sigma_t,
|
||||
sigma = kronecker(sigma_t, sigma_p),
|
||||
alpha = alpha, beta = beta,
|
||||
Delta = omega,
|
||||
B = kronecker(alpha, beta)))
|
||||
list(betas = betas)
|
||||
}
|
||||
|
|
|
@ -1,11 +1,14 @@
|
|||
#' Tensor Sliced Inverse Regression
|
||||
#'
|
||||
#' @export
|
||||
TSIR <- function(X, y, d, sample.axis = 1L,
|
||||
TSIR <- function(X, y,
|
||||
reduction.dims = rep(1L, length(dim(X)) - 1L),
|
||||
sample.axis = 1L,
|
||||
nr.slices = 10L, # default slices, ignored if y is a factor or integer
|
||||
max.iter = 50L,
|
||||
eps = sqrt(.Machine$double.eps),
|
||||
slice.method = c("cut", "ecdf") # ignored if y is a factor or integer
|
||||
cond.threshold = Inf, eps = sqrt(.Machine$double.eps),
|
||||
slice.method = c("cut", "ecdf"), # ignored if y is a factor or integer
|
||||
logger = NULL
|
||||
) {
|
||||
|
||||
if (!(is.factor(y) || is.integer(y))) {
|
||||
|
@ -23,7 +26,7 @@ TSIR <- function(X, y, d, sample.axis = 1L,
|
|||
|
||||
stopifnot(exprs = {
|
||||
dim(X)[sample.axis] == length(y)
|
||||
length(d) + 1L == length(dim(X))
|
||||
length(reduction.dims) + 1L == length(dim(X))
|
||||
})
|
||||
|
||||
# rearrange `X`, `Fy` such that the last axis enumerates observations
|
||||
|
@ -60,7 +63,7 @@ TSIR <- function(X, y, d, sample.axis = 1L,
|
|||
mcrossprod(rowMeans(X_s - c(mu_s), dims = length(modes)), mode = k)
|
||||
}, slices, slice.means, slice.sizes)) / nr.slices
|
||||
}, modes)
|
||||
Gammas <- Map(`[[`, Map(La.svd, Sigmas, nu = d, nv = 0), list("u"))
|
||||
Gammas <- Map(`[[`, Map(La.svd, Sigmas, nu = reduction.dims, nv = 0), list("u"))
|
||||
|
||||
# setup projections as `Proj_k = Gamma_k Gamma_k'`
|
||||
projections <- Map(tcrossprod, Gammas)
|
||||
|
@ -70,6 +73,12 @@ TSIR <- function(X, y, d, sample.axis = 1L,
|
|||
n_s * sum((X_s - mlm(X_s, projections))^2)
|
||||
}, slice.means, slice.sizes))
|
||||
|
||||
# invoke the logger
|
||||
if (is.function(logger)) do.call(logger, list(
|
||||
iter = 0L, Gammas = Gammas, Sigmas = Sigmas, loss = loss.last
|
||||
))
|
||||
|
||||
|
||||
# Iterate till convergence
|
||||
for (iter in seq_len(max.iter)) {
|
||||
# For each mode assume mode reduction `Gamma`s fixed and update
|
||||
|
@ -86,7 +95,7 @@ TSIR <- function(X, y, d, sample.axis = 1L,
|
|||
}, slice.proj, slice.sizes))
|
||||
|
||||
# Recompute mode `k` basis `Gamma_k`
|
||||
Gammas[[k]] <- La.svd(Sigmas[[k]], nu = d[k], nv = 0)$u
|
||||
Gammas[[k]] <- La.svd(Sigmas[[k]], nu = reduction.dims[k], nv = 0)$u
|
||||
|
||||
# update mode `k` projection matrix onto the new span of `Gamma_k`
|
||||
projections[[k]] <- tcrossprod(Gammas[[k]])
|
||||
|
@ -97,6 +106,11 @@ TSIR <- function(X, y, d, sample.axis = 1L,
|
|||
}, slice.means, slice.sizes))
|
||||
}
|
||||
|
||||
# invoke the logger
|
||||
if (is.function(logger)) do.call(logger, list(
|
||||
iter = iter, Gammas = Gammas, Sigmas = Sigmas, loss = loss
|
||||
))
|
||||
|
||||
# check break condition
|
||||
if (abs(loss - loss.last) < eps * loss) {
|
||||
break
|
||||
|
@ -107,6 +121,18 @@ TSIR <- function(X, y, d, sample.axis = 1L,
|
|||
# mode sample covariance matrices
|
||||
Omegas <- Map(function(k) n^-1 * mcrossprod(X, mode = k), modes)
|
||||
|
||||
# Check condition of sample covariances and perform regularization if needed
|
||||
if (is.finite(cond.threshold)) {
|
||||
for (j in seq_along(Omegas)) {
|
||||
# Compute min and max eigen values
|
||||
min_max <- range(eigen(Omegas[[j]], TRUE, TRUE)$values)
|
||||
# The condition is approximately `kappa(Omegas[[j]]) > cond.threshold`
|
||||
if (min_max[2] > cond.threshold * min_max[1]) {
|
||||
Omegas[[j]] <- Omegas[[j]] + diag(0.2 * min_max[2], nrow(Omegas[[j]]))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# reductions matrices `Omega_k^-1 Gamma_k` where there (reverse) kronecker
|
||||
# product spans the central tensor subspace (CTS) estimate
|
||||
structure(Map(solve, Omegas, Gammas), mcov = Omegas, Gammas = Gammas)
|
||||
|
|
|
@ -47,7 +47,7 @@ gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)),
|
|||
Omegas <- Map(diag, dimX)
|
||||
|
||||
# Per mode covariance directions
|
||||
# Note: (the directions are transposed!)
|
||||
# Note: directions are transposed!
|
||||
dirsX <- Map(function(Sigma) {
|
||||
SVD <- La.svd(Sigma, nu = 0)
|
||||
sqrt(SVD$d) * SVD$vt
|
||||
|
@ -85,8 +85,8 @@ gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)),
|
|||
### Phase 2: (Block) Coordinate Descent
|
||||
for (iter in seq_len(max.iter)) {
|
||||
|
||||
# update every beta (in random order)
|
||||
for (j in sample.int(length(betas))) {
|
||||
# update every beta
|
||||
for (j in seq_along(betas)) {
|
||||
FxB_j <- mlm(F, betas[-j], modes[-j])
|
||||
FxSB_j <- mlm(FxB_j, Sigmas[-j], modes[-j])
|
||||
betas[[j]] <- Omegas[[j]] %*% t(solve(mcrossprod(FxSB_j, FxB_j, j), mcrossprod(FxB_j, X, j)))
|
||||
|
|
Loading…
Reference in New Issue