tensor_predictors/tensorPredictors/R/GEP.R

77 lines
2.7 KiB
R

#' Compute eigenvalue problem equiv. to method.
#'
#' Computes the matrices \eqn{A, B} of a generalized eigenvalue problem
#' \deqn{A X = B Lambda} with \eqn{X} the matrix of eigenvectors and
#' \eqn{Lambda} diagonal matrix of eigenvalues.
#'
#' @param X predictor matrix.
#' @param y responses (see details).
#' @param method One of "pca", "pfc", ... TODO: finish, add more, ...
#'
#' @returns list of matrices \code{lhs} for \eqn{A} and \code{rhs} for \eqn{B}.
#'
#' @seealso section 2.1 of "Coordinate-Independent Sparse Sufficient Dimension
#' Reduction and Variable Selection" By Xin Chen, Changliang Zou and
#' R. Dennis Cook.
#'
GEP <- function(X, y, method = c('pfc', 'pca', 'sir', 'lda'), ...,
nr.slices = 10, ensamble = list(abs, identity, function(x) x^2)
) {
method <- match.arg(method)
if (method == 'pca') {
lhs <- cov(X) # covariance
rhs <- diag(ncol(X)) # identity
} else if (method == 'pfc') {
X <- scale(X, scale = FALSE, center = TRUE)
Fy <- sapply(ensamble, do.call, list(y))
Fy <- scale(Fy, scale = FALSE, center = TRUE)
# Compute Sigma_fit (the sample covariance matrix of the fitted vectors).
P_Fy <- Fy %*% solve(crossprod(Fy), t(Fy))
lhs <- crossprod(X, P_Fy %*% X) / nrow(X)
# Estimate Sigma (the MLE sample covariance matrix).
rhs <- crossprod(X) / nrow(X)
} else if (method == 'sir') {
if (NCOL(y) != 1) {
stop('For SIR only univariate response suported.')
}
# Build slices (if not categorical)
if (is.factor(y)) {
slice.index <- y
} else {
# TODO: make this proper, just for a bit of playing!!!
slice.size <- round(length(y) / nr.slices)
slice.index <- factor((rank(y) - 1) %/% slice.size)
}
# Covariance of slice means, Cov(E[X - E X | y])
lhs <- cov(t(sapply(levels(slice.index), function(i) {
colMeans(X[slice.index == i, , drop = FALSE])
})))
# Sample covariance
rhs <- cov(X)
} else if (method == 'lda') {
# TODO: check this properly!!! (Maybe a bit better implementation and/or
# some theoretical inaccuracies)
y <- as.factor(y)
# group means
mu <- as.matrix(aggregate(X, list(y), mean)[, -1])
# between group covariance Sigma.B = Cov(E[X | y])
lhs <- ((nrow(mu) - 1) / nrow(mu)) * cov(mu)
# within group covariance (Sigma.T = Sigma.B + Sigma.W)
# with Sigma.W = E(Cov(X | y)) and Sigma.T = Cov(X)
rhs <- (((nrow(X) - 1) / nrow(X)) * cov(X)) - lhs
} else {
stop('Not implemented!')
}
# Return left- and right-hand-side of GEP equation system.
list(lhs = lhs, rhs = rhs)
}