tensor_predictors/tensorPredictors/R/LSIR.R

#' Longitudinal Sliced Inverse Regression
#'
#' @param X matrix of dim \eqn{n \times p t} with each row representing a
#'  vectorized \eqn{p \times t} observation.
#' @param y vector of \eqn{n} elements as factors. (can be coersed to factors)
#'
#' TODO: finish
#'
#' @export
LSIR <- function(X, y,
    reduction.dims = rep(1L, length(dim(X)) - 1L),
    sample.axis = 1L,
    nr.slices = 10L,    # default slices, ignored if y is a factor or integer
    eps = sqrt(.Machine$double.eps),
    slice.method = c("cut", "ecdf")   # ignored if y is a factor or integer
) {
    # In case of `y` not descrete, group `y` into slices
    if (!(is.factor(y) || is.integer(y))) {
        slice.method <- match.arg(slice.method)
        if (slice.method == "ecdf") {
            y <- cut(ecdf(y)(y), nr.slices)
        } else {
            y <- cut(y, nr.slices)
            # ensure there are no empty slices
            if (any(table(y) == 0)) {
                y <- as.factor(as.integer(y))
            }
        }
    }
    if (!is.factor(y)) {
        y <- factor(y)
    }

    stopifnot(dim(X)[sample.axis] == length(y))

    # rearrange `X` such that the first axis enumerates observations
    axis.perm <- c(sample.axis, seq_along(dim(X))[-sample.axis])
    X <- aperm(X, axis.perm)

    modes <- seq_along(dim(X))[-1L]
    n <- dim(X)[1L]

    Sigmas <- lapply(seq_along(modes), function(i) {
        matrix(rowMeans(apply(X, modes[-i], cov)), dim(X)[modes[i]])
    })
    # Omega_i = Sigma_i^{-1 / 2}
    isqrt_Sigmas <- Map(matpow, Sigmas, -1 / 2)

    # Normalize observations
    Z <- mlm(X - rep(colMeans(X), each = dim(X)[1L]), isqrt_Sigmas, modes = modes)

    # Estimate conditional covariances Omega = Cov(E[Z | Y])
    slice.args <- c(
        list(Z), rep(alist(, )[1], length(dim(X))), list(drop = FALSE)
    )
    Omegas <- lapply(seq_along(modes), function(i) {
        matrix(Reduce(`+`, lapply(levels(y), function(l) {
            slice.args[[2]] <- y == l
            rowMeans(apply(do.call(`[`, slice.args), modes[-i], function(z) {
                (nrow(z) / n) * tcrossprod(colMeans(z))
            }))
        })), dim(X)[modes[i]])
    })

    # Compute central subspace basis estimate
    betas <- mapply(function(isqrt_sigma, omega, reduction_dim) {
        isqrt_sigma %*% La.svd(omega, reduction_dim, 0L)$u
    }, isqrt_Sigmas, Omegas, reduction.dims, SIMPLIFY = FALSE)

    list(betas = betas, Sigmas = Sigmas, Omegas = Omegas)
}