tensor_predictors/tensorPredictors/R/TSIR.R

#' Tensor Sliced Inverse Regression
#'
#' @export
TSIR <- function(X, y, d, sample.axis = 1L,
    nr.slices = 10L,    # default slices, ignored if y is a factor or integer
    max.iter = 50L,
    eps = sqrt(.Machine$double.eps),
    slice.method = c("cut", "ecdf")   # ignored if y is a factor or integer
) {

    if (!(is.factor(y) || is.integer(y))) {
        slice.method <- match.arg(slice.method)
        if (slice.method == "ecdf") {
            y <- cut(ecdf(y)(y), nr.slices)
        } else {
            y <- cut(y, nr.slices)
            # ensure there are no empty slices
            if (any(table(y) == 0)) {
                y <- as.factor(as.integer(y))
            }
        }
    }

    stopifnot(exprs = {
        dim(X)[sample.axis] == length(y)
        length(d) + 1L == length(dim(X))
    })

    # rearrange `X`, `Fy` such that the last axis enumerates observations
    axis.perm <- c(seq_along(dim(X))[-sample.axis], sample.axis)
    X <- aperm(X, axis.perm)

    # get dimensions
    n <- tail(dim(X), 1)
    p <- head(dim(X), -1)
    modes <- seq_along(p)

    # reinterpretation of `y` as factor index as number of different slices
    y <- as.factor(y)
    nr.slices <- length(levels(y))
    slice.sizes <- table(y)
    y <- as.integer(y)

    # center `X`
    X <- X - c(rowMeans(X, dim = length(modes)))

    # Slice `X` into slices governed by `y`
    slices <- Map(function(s) {
        X_s <- X[rep(s == y, each = prod(p))]
        dim(X_s) <- c(p, slice.sizes[s])
        X_s
    }, seq_len(nr.slices))

    # For each slice we get the slice means
    slice.means <- Map(rowMeans, slices, MoreArgs = list(dim = length(modes)))

    # Initial `Gamma_k` estimates as dominent eigenvalues of `Cov_c(E[X_(k) | Y])`
    Sigmas <- Map(function(k) {
        Reduce(`+`, Map(function(X_s, mu_s, n_s) {
            mcrossprod(rowMeans(X_s - c(mu_s), dims = length(modes)), mode = k)
        }, slices, slice.means, slice.sizes)) / nr.slices
    }, modes)
    Gammas <- Map(`[[`, Map(La.svd, Sigmas, nu = d, nv = 0), list("u"))

    # setup projections as `Proj_k = Gamma_k Gamma_k'`
    projections <- Map(tcrossprod, Gammas)

    # compute initial loss for the break condition
    loss.last <- n^-1 * Reduce(`+`, Map(function(X_s, n_s) {
        n_s * sum((X_s - mlm(X_s, projections))^2)
    }, slice.means, slice.sizes))

    # Iterate till convergence
    for (iter in seq_len(max.iter)) {
        # For each mode assume mode reduction `Gamma`s fixed and update
        # current mode reduction `Gamma_k`
        for (k in modes) {
            # Other mode projection of slice means
            slice.proj <- Map(mlm, slice.means, MoreArgs = list(
                Gammas[-k], modes[-k], transposed = TRUE
            ))

            # Update mode `k` slice mean covariances
            Sigmas[[k]] <- n^-1 * Reduce(`+`, Map(function(PX_s, n_s) {
                n_s * mcrossprod(PX_s, mode = k)
            }, slice.proj, slice.sizes))

            # Recompute mode `k` basis `Gamma_k`
            Gammas[[k]] <- La.svd(Sigmas[[k]], nu = d[k], nv = 0)$u

            # update mode `k` projection matrix onto the new span of `Gamma_k`
            projections[[k]] <- tcrossprod(Gammas[[k]])

            # compute loss for break condition
            loss <- n^-1 * Reduce(`+`, Map(function(X_s, n_s) {
                n_s * sum((X_s - mlm(X_s, projections))^2)
            }, slice.means, slice.sizes))
        }

        # check break condition
        if (abs(loss - loss.last) < eps * loss) {
            break
        }
        loss.last <- loss
    }

    # mode sample covariance matrices
    Omegas <- Map(function(k) n^-1 * mcrossprod(X, mode = k), modes)

    # reductions matrices `Omega_k^-1 Gamma_k` where there (reverse) kronecker
    # product spans the central tensor subspace (CTS) estimate
    Map(solve, Omegas, Gammas)
}
wip: GMLM, add: sims 2022-10-11 17:09:55 +00:00			`#' Tensor Sliced Inverse Regression`
			`#'`
			`#' @export`
			`TSIR <- function(X, y, d, sample.axis = 1L,`
add: Ising shiny data sen visualization, wip: GMLM TeX, add: ising small sim, fix: reduction in sims 2022-10-12 18:28:59 +00:00			`nr.slices = 10L, # default slices, ignored if y is a factor or integer`
wip: GMLM, add: sims 2022-10-11 17:09:55 +00:00			`max.iter = 50L,`
dev 2023-11-14 13:35:43 +00:00			`eps = sqrt(.Machine$double.eps),`
			`slice.method = c("cut", "ecdf") # ignored if y is a factor or integer`
wip: GMLM, add: sims 2022-10-11 17:09:55 +00:00			`) {`

add: Ising shiny data sen visualization, wip: GMLM TeX, add: ising small sim, fix: reduction in sims 2022-10-12 18:28:59 +00:00			`if (!(is.factor(y) \|\| is.integer(y))) {`
dev 2023-11-14 13:35:43 +00:00			`slice.method <- match.arg(slice.method)`
			`if (slice.method == "ecdf") {`
			`y <- cut(ecdf(y)(y), nr.slices)`
			`} else {`
			`y <- cut(y, nr.slices)`
			`# ensure there are no empty slices`
			`if (any(table(y) == 0)) {`
			`y <- as.factor(as.integer(y))`
			`}`
			`}`
wip: GMLM, add: sims 2022-10-11 17:09:55 +00:00			`}`

			`stopifnot(exprs = {`
			`dim(X)[sample.axis] == length(y)`
			`length(d) + 1L == length(dim(X))`
			`})`

			# rearrange `X`, `Fy` such that the last axis enumerates observations
			`axis.perm <- c(seq_along(dim(X))[-sample.axis], sample.axis)`
			`X <- aperm(X, axis.perm)`

			`# get dimensions`
			`n <- tail(dim(X), 1)`
			`p <- head(dim(X), -1)`
			`modes <- seq_along(p)`

			# reinterpretation of `y` as factor index as number of different slices
			`y <- as.factor(y)`
			`nr.slices <- length(levels(y))`
			`slice.sizes <- table(y)`
			`y <- as.integer(y)`

			# center `X`
			`X <- X - c(rowMeans(X, dim = length(modes)))`

			# Slice `X` into slices governed by `y`
			`slices <- Map(function(s) {`
			`X_s <- X[rep(s == y, each = prod(p))]`
			`dim(X_s) <- c(p, slice.sizes[s])`
			`X_s`
			`}, seq_len(nr.slices))`

			`# For each slice we get the slice means`
			`slice.means <- Map(rowMeans, slices, MoreArgs = list(dim = length(modes)))`

			# Initial `Gamma_k` estimates as dominent eigenvalues of `Cov_c(E[X_(k) \| Y])`
			`Sigmas <- Map(function(k) {`
			Reduce(`+`, Map(function(X_s, mu_s, n_s) {
			`mcrossprod(rowMeans(X_s - c(mu_s), dims = length(modes)), mode = k)`
			`}, slices, slice.means, slice.sizes)) / nr.slices`
			`}, modes)`
			Gammas <- Map(`[[`, Map(La.svd, Sigmas, nu = d, nv = 0), list("u"))

			# setup projections as `Proj_k = Gamma_k Gamma_k'`
			`projections <- Map(tcrossprod, Gammas)`

			`# compute initial loss for the break condition`
			loss.last <- n^-1 * Reduce(`+`, Map(function(X_s, n_s) {
			`n_s * sum((X_s - mlm(X_s, projections))^2)`
			`}, slice.means, slice.sizes))`

			`# Iterate till convergence`
			`for (iter in seq_len(max.iter)) {`
			# For each mode assume mode reduction `Gamma`s fixed and update
			# current mode reduction `Gamma_k`
			`for (k in modes) {`
			`# Other mode projection of slice means`
			`slice.proj <- Map(mlm, slice.means, MoreArgs = list(`
			`Gammas[-k], modes[-k], transposed = TRUE`
			`))`

			# Update mode `k` slice mean covariances
			Sigmas[[k]] <- n^-1 * Reduce(`+`, Map(function(PX_s, n_s) {
			`n_s * mcrossprod(PX_s, mode = k)`
			`}, slice.proj, slice.sizes))`

			# Recompute mode `k` basis `Gamma_k`
			`Gammas[[k]] <- La.svd(Sigmas[[k]], nu = d[k], nv = 0)$u`

			# update mode `k` projection matrix onto the new span of `Gamma_k`
			`projections[[k]] <- tcrossprod(Gammas[[k]])`

			`# compute loss for break condition`
			loss <- n^-1 * Reduce(`+`, Map(function(X_s, n_s) {
			`n_s * sum((X_s - mlm(X_s, projections))^2)`
			`}, slice.means, slice.sizes))`
			`}`

			`# check break condition`
			`if (abs(loss - loss.last) < eps * loss) {`
			`break`
			`}`
			`loss.last <- loss`
			`}`

			`# mode sample covariance matrices`
			`Omegas <- Map(function(k) n^-1 * mcrossprod(X, mode = k), modes)`

			# reductions matrices `Omega_k^-1 Gamma_k` where there (reverse) kronecker
			`# product spans the central tensor subspace (CTS) estimate`
			`Map(solve, Omegas, Gammas)`
			`}`