2021-11-12 17:22:45 +00:00
|
|
|
#' Compute eigenvalue problem equiv. to method.
|
|
|
|
#'
|
|
|
|
#' Computes the matrices \eqn{A, B} of a generalized eigenvalue problem
|
|
|
|
#' \deqn{A X = B Lambda} with \eqn{X} the matrix of eigenvectors and
|
|
|
|
#' \eqn{Lambda} diagonal matrix of eigenvalues.
|
|
|
|
#'
|
|
|
|
#' @param X predictor matrix.
|
|
|
|
#' @param y responses (see details).
|
|
|
|
#' @param method One of "pca", "pfc", ... TODO: finish, add more, ...
|
|
|
|
#'
|
|
|
|
#' @returns list of matrices \code{lhs} for \eqn{A} and \code{rhs} for \eqn{B}.
|
|
|
|
#'
|
|
|
|
#' @seealso section 2.1 of "Coordinate-Independent Sparse Sufficient Dimension
|
|
|
|
#' Reduction and Variable Selection" By Xin Chen, Changliang Zou and
|
|
|
|
#' R. Dennis Cook.
|
|
|
|
#'
|
2021-11-16 11:01:12 +00:00
|
|
|
GEP <- function(X, y, method = c('pfc', 'pca', 'sir', 'lda'), ...,
|
2021-11-12 17:22:45 +00:00
|
|
|
nr.slices = 10, ensamble = list(abs, identity, function(x) x^2)
|
|
|
|
) {
|
|
|
|
method <- match.arg(method)
|
|
|
|
|
|
|
|
if (method == 'pca') {
|
|
|
|
lhs <- cov(X) # covariance
|
|
|
|
rhs <- diag(ncol(X)) # identity
|
|
|
|
} else if (method == 'pfc') {
|
|
|
|
X <- scale(X, scale = FALSE, center = TRUE)
|
|
|
|
|
|
|
|
Fy <- sapply(ensamble, do.call, list(y))
|
|
|
|
Fy <- scale(Fy, scale = FALSE, center = TRUE)
|
|
|
|
|
|
|
|
# Compute Sigma_fit (the sample covariance matrix of the fitted vectors).
|
|
|
|
P_Fy <- Fy %*% solve(crossprod(Fy), t(Fy))
|
|
|
|
lhs <- crossprod(X, P_Fy %*% X) / nrow(X)
|
|
|
|
# Estimate Sigma (the MLE sample covariance matrix).
|
|
|
|
rhs <- crossprod(X) / nrow(X)
|
|
|
|
} else if (method == 'sir') {
|
|
|
|
if (NCOL(y) != 1) {
|
|
|
|
stop('For SIR only univariate response suported.')
|
|
|
|
}
|
|
|
|
# Build slices (if not categorical)
|
|
|
|
if (is.factor(y)) {
|
|
|
|
slice.index <- y
|
|
|
|
} else {
|
|
|
|
# TODO: make this proper, just for a bit of playing!!!
|
|
|
|
slice.size <- round(length(y) / nr.slices)
|
|
|
|
slice.index <- factor((rank(y) - 1) %/% slice.size)
|
|
|
|
}
|
|
|
|
|
|
|
|
# Covariance of slice means, Cov(E[X - E X | y])
|
|
|
|
lhs <- cov(t(sapply(levels(slice.index), function(i) {
|
|
|
|
colMeans(X[slice.index == i, , drop = FALSE])
|
|
|
|
})))
|
|
|
|
# Sample covariance
|
|
|
|
rhs <- cov(X)
|
2021-11-16 11:01:12 +00:00
|
|
|
} else if (method == 'lda') {
|
|
|
|
# TODO: check this properly!!! (Maybe a bit better implementation and/or
|
|
|
|
# some theoretical inaccuracies)
|
|
|
|
|
|
|
|
y <- as.factor(y)
|
|
|
|
|
|
|
|
# group means
|
|
|
|
mu <- as.matrix(aggregate(X, list(y), mean)[, -1])
|
|
|
|
|
|
|
|
# between group covariance Sigma.B = Cov(E[X | y])
|
|
|
|
lhs <- ((nrow(mu) - 1) / nrow(mu)) * cov(mu)
|
|
|
|
|
|
|
|
# within group covariance (Sigma.T = Sigma.B + Sigma.W)
|
|
|
|
# with Sigma.W = E(Cov(X | y)) and Sigma.T = Cov(X)
|
|
|
|
rhs <- (((nrow(X) - 1) / nrow(X)) * cov(X)) - lhs
|
2021-11-12 17:22:45 +00:00
|
|
|
} else {
|
|
|
|
stop('Not implemented!')
|
|
|
|
}
|
|
|
|
|
|
|
|
# Return left- and right-hand-side of GEP equation system.
|
|
|
|
list(lhs = lhs, rhs = rhs)
|
|
|
|
}
|