114 lines
3.8 KiB
R
114 lines
3.8 KiB
R
#' Tensor Sliced Inverse Regression
|
|
#'
|
|
#' @export
|
|
TSIR <- function(X, y, d, sample.axis = 1L,
|
|
nr.slices = 10L, # default slices, ignored if y is a factor or integer
|
|
max.iter = 50L,
|
|
eps = sqrt(.Machine$double.eps),
|
|
slice.method = c("cut", "ecdf") # ignored if y is a factor or integer
|
|
) {
|
|
|
|
if (!(is.factor(y) || is.integer(y))) {
|
|
slice.method <- match.arg(slice.method)
|
|
if (slice.method == "ecdf") {
|
|
y <- cut(ecdf(y)(y), nr.slices)
|
|
} else {
|
|
y <- cut(y, nr.slices)
|
|
# ensure there are no empty slices
|
|
if (any(table(y) == 0)) {
|
|
y <- as.factor(as.integer(y))
|
|
}
|
|
}
|
|
}
|
|
|
|
stopifnot(exprs = {
|
|
dim(X)[sample.axis] == length(y)
|
|
length(d) + 1L == length(dim(X))
|
|
})
|
|
|
|
# rearrange `X`, `Fy` such that the last axis enumerates observations
|
|
axis.perm <- c(seq_along(dim(X))[-sample.axis], sample.axis)
|
|
X <- aperm(X, axis.perm)
|
|
|
|
# get dimensions
|
|
n <- tail(dim(X), 1)
|
|
p <- head(dim(X), -1)
|
|
modes <- seq_along(p)
|
|
|
|
# reinterpretation of `y` as factor index as number of different slices
|
|
y <- as.factor(y)
|
|
nr.slices <- length(levels(y))
|
|
slice.sizes <- table(y)
|
|
y <- as.integer(y)
|
|
|
|
# center `X`
|
|
X <- X - c(rowMeans(X, dim = length(modes)))
|
|
|
|
# Slice `X` into slices governed by `y`
|
|
slices <- Map(function(s) {
|
|
X_s <- X[rep(s == y, each = prod(p))]
|
|
dim(X_s) <- c(p, slice.sizes[s])
|
|
X_s
|
|
}, seq_len(nr.slices))
|
|
|
|
# For each slice we get the slice means
|
|
slice.means <- Map(rowMeans, slices, MoreArgs = list(dim = length(modes)))
|
|
|
|
# Initial `Gamma_k` estimates as dominent eigenvalues of `Cov_c(E[X_(k) | Y])`
|
|
Sigmas <- Map(function(k) {
|
|
Reduce(`+`, Map(function(X_s, mu_s, n_s) {
|
|
mcrossprod(rowMeans(X_s - c(mu_s), dims = length(modes)), mode = k)
|
|
}, slices, slice.means, slice.sizes)) / nr.slices
|
|
}, modes)
|
|
Gammas <- Map(`[[`, Map(La.svd, Sigmas, nu = d, nv = 0), list("u"))
|
|
|
|
# setup projections as `Proj_k = Gamma_k Gamma_k'`
|
|
projections <- Map(tcrossprod, Gammas)
|
|
|
|
# compute initial loss for the break condition
|
|
loss.last <- n^-1 * Reduce(`+`, Map(function(X_s, n_s) {
|
|
n_s * sum((X_s - mlm(X_s, projections))^2)
|
|
}, slice.means, slice.sizes))
|
|
|
|
# Iterate till convergence
|
|
for (iter in seq_len(max.iter)) {
|
|
# For each mode assume mode reduction `Gamma`s fixed and update
|
|
# current mode reduction `Gamma_k`
|
|
for (k in modes) {
|
|
# Other mode projection of slice means
|
|
slice.proj <- Map(mlm, slice.means, MoreArgs = list(
|
|
Gammas[-k], modes[-k], transposed = TRUE
|
|
))
|
|
|
|
# Update mode `k` slice mean covariances
|
|
Sigmas[[k]] <- n^-1 * Reduce(`+`, Map(function(PX_s, n_s) {
|
|
n_s * mcrossprod(PX_s, mode = k)
|
|
}, slice.proj, slice.sizes))
|
|
|
|
# Recompute mode `k` basis `Gamma_k`
|
|
Gammas[[k]] <- La.svd(Sigmas[[k]], nu = d[k], nv = 0)$u
|
|
|
|
# update mode `k` projection matrix onto the new span of `Gamma_k`
|
|
projections[[k]] <- tcrossprod(Gammas[[k]])
|
|
|
|
# compute loss for break condition
|
|
loss <- n^-1 * Reduce(`+`, Map(function(X_s, n_s) {
|
|
n_s * sum((X_s - mlm(X_s, projections))^2)
|
|
}, slice.means, slice.sizes))
|
|
}
|
|
|
|
# check break condition
|
|
if (abs(loss - loss.last) < eps * loss) {
|
|
break
|
|
}
|
|
loss.last <- loss
|
|
}
|
|
|
|
# mode sample covariance matrices
|
|
Omegas <- Map(function(k) n^-1 * mcrossprod(X, mode = k), modes)
|
|
|
|
# reductions matrices `Omega_k^-1 Gamma_k` where there (reverse) kronecker
|
|
# product spans the central tensor subspace (CTS) estimate
|
|
structure(Map(solve, Omegas, Gammas), mcov = Omegas, Gammas = Gammas)
|
|
}
|