tensor_predictors/tensorPredictors/R/GEP.R

#' Compute eigenvalue problem equiv. to method.
#'
#' Computes the matrices \eqn{A, B} of a generalized eigenvalue problem
#' \deqn{A X = B Lambda} with \eqn{X} the matrix of eigenvectors and
#' \eqn{Lambda} diagonal matrix of eigenvalues.
#'
#' @param X predictor matrix.
#' @param y responses (see details).
#' @param method One of "pca", "pfc", ... TODO: finish, add more, ...
#'
#' @returns list of matrices \code{lhs} for \eqn{A} and \code{rhs} for \eqn{B}.
#'
#' @seealso section 2.1 of "Coordinate-Independent Sparse Sufficient Dimension
#' Reduction and Variable Selection" By Xin Chen, Changliang Zou and
#' R. Dennis Cook.
#'
GEP <- function(X, y, method = c('pfc', 'pca', 'sir', 'lda'), ...,
    nr.slices = 10, ensamble = list(abs, identity, function(x) x^2)
) {
    method <- match.arg(method)

    if (method == 'pca') {
        lhs <- cov(X)           # covariance
        rhs <- diag(ncol(X))    # identity
    } else if (method == 'pfc') {
        X <- scale(X, scale = FALSE, center = TRUE)

        Fy <- sapply(ensamble, do.call, list(y))
        Fy <- scale(Fy, scale = FALSE, center = TRUE)

        # Compute Sigma_fit (the sample covariance matrix of the fitted vectors).
        P_Fy <- Fy %*% solve(crossprod(Fy), t(Fy))
        lhs <- crossprod(X, P_Fy %*% X) / nrow(X)
        # Estimate Sigma (the MLE sample covariance matrix).
        rhs <- crossprod(X) / nrow(X)
    } else if (method == 'sir') {
        if (NCOL(y) != 1) {
            stop('For SIR only univariate response suported.')
        }
        # Build slices (if not categorical)
        if (is.factor(y)) {
            slice.index <- y
        } else {
            # TODO: make this proper, just for a bit of playing!!!
            slice.size <- round(length(y) / nr.slices)
            slice.index <- factor((rank(y) - 1) %/% slice.size)
        }

        # Covariance of slice means, Cov(E[X - E X | y])
        lhs <- cov(t(sapply(levels(slice.index), function(i) {
            colMeans(X[slice.index == i, , drop = FALSE])
        })))
        # Sample covariance
        rhs <- cov(X)
    } else if (method == 'lda') {
        # TODO: check this properly!!! (Maybe a bit better implementation and/or
        # some theoretical inaccuracies)

        y <- as.factor(y)

        # group means
        mu <- as.matrix(aggregate(X, list(y), mean)[, -1])

        # between group covariance Sigma.B = Cov(E[X | y])
        lhs <- ((nrow(mu) - 1) / nrow(mu)) * cov(mu)

        # within group covariance (Sigma.T = Sigma.B + Sigma.W)
        # with Sigma.W = E(Cov(X | y)) and Sigma.T = Cov(X)
        rhs <- (((nrow(X) - 1) / nrow(X)) * cov(X)) - lhs
    } else {
        stop('Not implemented!')
    }

    # Return left- and right-hand-side of GEP equation system.
    list(lhs = lhs, rhs = rhs)
}