77 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
			
		
		
	
	
			77 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
#' Compute eigenvalue problem equiv. to method.
 | 
						|
#'
 | 
						|
#' Computes the matrices \eqn{A, B} of a generalized eigenvalue problem
 | 
						|
#' \deqn{A X = B Lambda} with \eqn{X} the matrix of eigenvectors and
 | 
						|
#' \eqn{Lambda} diagonal matrix of eigenvalues.
 | 
						|
#'
 | 
						|
#' @param X predictor matrix.
 | 
						|
#' @param y responses (see details).
 | 
						|
#' @param method One of "pca", "pfc", ... TODO: finish, add more, ...
 | 
						|
#'
 | 
						|
#' @returns list of matrices \code{lhs} for \eqn{A} and \code{rhs} for \eqn{B}.
 | 
						|
#'
 | 
						|
#' @seealso section 2.1 of "Coordinate-Independent Sparse Sufficient Dimension
 | 
						|
#' Reduction and Variable Selection" By Xin Chen, Changliang Zou and
 | 
						|
#' R. Dennis Cook.
 | 
						|
#'
 | 
						|
GEP <- function(X, y, method = c('pfc', 'pca', 'sir', 'lda'), ...,
 | 
						|
    nr.slices = 10, ensamble = list(abs, identity, function(x) x^2)
 | 
						|
) {
 | 
						|
    method <- match.arg(method)
 | 
						|
 | 
						|
    if (method == 'pca') {
 | 
						|
        lhs <- cov(X)           # covariance
 | 
						|
        rhs <- diag(ncol(X))    # identity
 | 
						|
    } else if (method == 'pfc') {
 | 
						|
        X <- scale(X, scale = FALSE, center = TRUE)
 | 
						|
 | 
						|
        Fy <- sapply(ensamble, do.call, list(y))
 | 
						|
        Fy <- scale(Fy, scale = FALSE, center = TRUE)
 | 
						|
 | 
						|
        # Compute Sigma_fit (the sample covariance matrix of the fitted vectors).
 | 
						|
        P_Fy <- Fy %*% solve(crossprod(Fy), t(Fy))
 | 
						|
        lhs <- crossprod(X, P_Fy %*% X) / nrow(X)
 | 
						|
        # Estimate Sigma (the MLE sample covariance matrix).
 | 
						|
        rhs <- crossprod(X) / nrow(X)
 | 
						|
    } else if (method == 'sir') {
 | 
						|
        if (NCOL(y) != 1) {
 | 
						|
            stop('For SIR only univariate response suported.')
 | 
						|
        }
 | 
						|
        # Build slices (if not categorical)
 | 
						|
        if (is.factor(y)) {
 | 
						|
            slice.index <- y
 | 
						|
        } else {
 | 
						|
            # TODO: make this proper, just for a bit of playing!!!
 | 
						|
            slice.size <- round(length(y) / nr.slices)
 | 
						|
            slice.index <- factor((rank(y) - 1) %/% slice.size)
 | 
						|
        }
 | 
						|
 | 
						|
        # Covariance of slice means, Cov(E[X - E X | y])
 | 
						|
        lhs <- cov(t(sapply(levels(slice.index), function(i) {
 | 
						|
            colMeans(X[slice.index == i, , drop = FALSE])
 | 
						|
        })))
 | 
						|
        # Sample covariance
 | 
						|
        rhs <- cov(X)
 | 
						|
    } else if (method == 'lda') {
 | 
						|
        # TODO: check this properly!!! (Maybe a bit better implementation and/or
 | 
						|
        # some theoretical inaccuracies)
 | 
						|
 | 
						|
        y <- as.factor(y)
 | 
						|
 | 
						|
        # group means
 | 
						|
        mu <- as.matrix(aggregate(X, list(y), mean)[, -1])
 | 
						|
 | 
						|
        # between group covariance Sigma.B = Cov(E[X | y])
 | 
						|
        lhs <- ((nrow(mu) - 1) / nrow(mu)) * cov(mu)
 | 
						|
 | 
						|
        # within group covariance (Sigma.T = Sigma.B + Sigma.W)
 | 
						|
        # with Sigma.W = E(Cov(X | y)) and Sigma.T = Cov(X)
 | 
						|
        rhs <- (((nrow(X) - 1) / nrow(X)) * cov(X)) - lhs
 | 
						|
    } else {
 | 
						|
        stop('Not implemented!')
 | 
						|
    }
 | 
						|
 | 
						|
    # Return left- and right-hand-side of GEP equation system.
 | 
						|
    list(lhs = lhs, rhs = rhs)
 | 
						|
}
 |