#' Coordinate-Independent Sparce Estimation. #' #' Solves penalized version of a GEP (Generalized Eigenvalue Problem) #' \deqn{M V = N \Lambda V} #' with \eqn{\Lambda} a matrix with eigenvalues on the main diagonal and \eqn{V} #' are the first \eqn{d} eigenvectors. #' #' TODO: DOES NOT WORK, DON'T KNOW WHY (contact first author for the Matlab code) #' #' @param M is the GEP's left hand side #' @param N is the GEP's right hand side #' @param d number of leading eigenvalues, -vectors to be computed #' @param max.iter maximum number of iterations for iterative optimization #' @param Theta Penalty parameter, if not provided an reasonable estimate for #' a grid of parameters is computed. If Theta is a vector (or number), the #' provided values of Theta are used as penalty parameter candidates. #' @param tol.norm numerical tolerance for dropping rows #' @param tol.break break condition tolerance #' #' @returns a list #' #' @examples \dontrun{ #' # Study 1-4 from CISE paper #' dataset <- function(name, n = 60, p = 24) { #' name <- toupper(name) #' if (!startsWith('M', name)) { name <- paste0('M', name) } #' #' if (name %in% c('M1', 'M2', 'M3')) { #' Sigma <- 0.5^abs(outer(1:p, 1:p, `-`)) #' X <- rmvnorm(n, sigma = Sigma) #' y <- switch(name, #' M1 = rowSums(X[, 1:3]) + rnorm(n, 0, 0.5), #' M2 = rowSums(X[, 1:3]) + rnorm(n, 0, 2), #' M3 = X[, 1] / (0.5 + (X[, 2] + 1.5)^2) + rnorm(n, 0, 0.2) #' ) #' B <- switch(name, #' M1 = as.matrix(as.double(1:p < 4)), #' M2 = as.matrix(as.double(1:p < 4)), #' M3 = diag(1, p, 2) #' ) #' } else if (name == 'M4') { #' y <- rnorm(n) #' Delta <- 0.5^abs(outer(1:p, 1:p, `-`)) #' Gamma <- 0.5 * cbind( #' (1:p <= 4), #' (-(1:p <= 4))^(1:p + 1) #' ) #' B <- qr.Q(qr(solve(Delta, Gamma))) #' X <- cbind(y, y^2) %*% t(Gamma) + #' matpow(Delta, 0.5) %*% rmvnorm(n, rep(0, p)) #' } else { #' stop('Unknown dataset name.') #' } #' #' list(X = X, y = y, B = B) #' } #' # Sample dataset #' n <- 1000 #' ds <- dataset(3, n = n) #' # Convert to PFC associated GEP #' Fy <- with(ds, cbind(abs(y), y, y^2)) #' P.Fy <- Fy %*% solve(crossprod(Fy), t(Fy)) #' M <- with(ds, crossprod(X, P.Fy %*% X) / nrow(X)) # Sigma Fit #' N <- cov(ds$X) # Sigma #' #' fits <- CISE(M, N, d = ncol(ds$B), Theta = log(seq(1, exp(1e-3), len = 1000))) #' #' BIC <- unlist(Map(attr, fits, 'BIC')) #' df <- unlist(Map(attr, fits, 'df')) #' dist <- unlist(Map(attr, fits, 'dist')) #' iter <- unlist(Map(attr, fits, 'iter')) #' theta <- unlist(Map(attr, fits, 'theta')) #' p.theta <- unlist(Map(function(V) sum(rowSums(V^2) > 1e-9), fits)) #' #' par(mfrow = c(2, 2)) #' plot(theta, BIC, type = 'l') #' plot(theta, p.theta, type = 'l') #' plot(theta, dist, type = 'l') #' plot(theta, iter, type = 'l') #' } #' #' @seealso "Coordinate-Independent Sparse Sufficient Dimension #' Reduction and Variable Selection" By Xin Chen, Changliang Zou and #' R. Dennis Cook. #' #' @note for speed reasons this functions attempts to use #' \code{\link[RSpectra]{eigs_sym}} if \pkg{\link{RSpectra}} is installed, #' otherwise \code{\link{eigen}} is used which might be significantly slower. #' #' @export CISE <- function(M, N, d = 1L, method = "PFC", max.iter = 100L, Theta = NULL, tol.norm = 1e-6, tol.break = 1e-6, r = 0.5 ) { isrN <- matpow(N, -0.5) # N^-1/2 ... Inverse Square-Root of N G <- isrN %*% M %*% isrN # G = N^-1/2 M N^-1/2 # Step 1: Solve (ordinary, unconstraint) eigenvalue problem used as an # initial value for following iterative optimization (Solution of (2.8)) Gamma <- if (requireNamespace("RSpectra", quietly = TRUE)) { RSpectra::eigs_sym(G, d)$vectors } else { eigen(G, symmetric = TRUE)$vectors[, d, drop = FALSE] } V.init <- isrN %*% Gamma # Build penalty grid if (missing(Theta)) { # TODO: figure out what a good min to max in steps grid is theta.max <- sqrt(max(rowSums(Gamma^2))) Theta <- seq(0.01 * theta.max, 0.75 * theta.max, length.out = 10) } norms <- sqrt(rowSums(V.init^2)) # row norms of V theta.scale <- 0.5 * ifelse(norms < tol.norm, 0, norms^(-r)) # For each penalty candidate fits <- lapply(Theta, function(theta) { # Step 2: Iteratively optimize constraint GEP V <- V.init dropped <- rep(FALSE, nrow(M)) # Keep track of dropped variables for (iter in seq_len(max.iter)) { # Compute current row norms norms <- sqrt(rowSums(V^2)) # row norms of V # Check if variables are dropped. If so, update dropped and # recompute the inverse square root of N if (any(norms < tol.norm)) { dropped[!dropped] <- norms < tol.norm norms <- norms[!(norms < tol.norm)] isrN <- matpow(N[!dropped, !dropped], -0.5) } # Approx. penalty term derivative at current position h <- theta * (theta.scale[!dropped] / norms) # Updated G at current position (scaling by 1/2 done in `theta.scale`) A <- G[!dropped, !dropped] - (isrN %*% (h * isrN)) # Solve next iteration GEP Gamma <- if (requireNamespace("RSpectra", quietly = TRUE)) { RSpectra::eigs_sym(A, d)$vectors } else { eigen(A, symmetric = TRUE)$vectors[, d, drop = FALSE] } V.last <- V V <- isrN %*% Gamma # Check if there are enough variables left if (nrow(V) < d + 1) { break } # Break dondition (only when nothing dropped) if (nrow(V.last) == nrow(V) && dist.subspace(V.last, V, normalize = TRUE) < tol.break) { break } } # Recreate dropped variables and fill parameters with 0. V.full <- matrix(0, nrow(M), d) V.full[!dropped, ] <- V # df <- (sum(!dropped) - d) * d # BIC <- -sum(V * (M %*% V)) + log(n) * df / n # cat("theta:", sprintf('%7.3f', range(theta)), # "- iter:", sprintf('%3d', iter), # "- df:", sprintf('%3d', df), # "- BIC:", sprintf('%7.3f', BIC), # "- dist:", sprintf('%7.3f', dist.subspace(V.last, V, normalize = TRUE)), # # "- ", paste(sprintf('%6.2f', norms), collapse = ", "), # '\n') structure(qr.Q(qr(V.full)), theta = theta, iter = iter, BIC = BIC, df = df, dist = dist.subspace(V.last, V, normalize = TRUE)) }) structure(fits, call = match.call(), class = c("tensorPredictors", "CISE")) }