solve.gep <- function(A, B, d = nrow(A)) { isrB <- matpow(B, -0.5) if ((d < nrow(A)) && requireNamespace("RSpectra", quietly = TRUE)) { eig <- RSpectra::eigs_sym(isrB %*% A %*% isrB, d) } else { eig <- eigen(isrB %*% A %*% isrB, symmetric = TRUE) } list(vectors = isrB %*% eig$vectors[, 1:d, drop = FALSE], values = eig$values) } POI.lambda.max <- function(A, d = 1L, method = c('POI-C', 'POI-L', 'FastPOI-C', 'FastPOI-L')) { method <- match.arg(method) if (method %in% c('POI-C', 'POI-L')) { A2 <- apply(apply(A^2, 2, sort, decreasing = TRUE), 2, cumsum) lambda.max <- sqrt(apply(A2[1:d, , drop = FALSE], 1, max)) if (method == 'POI-C') { lambda.max[d] } else { lambda.max[1] } } else { if (requireNamespace("RSpectra", quietly = TRUE)) { vec <- RSpectra::eigs_sym(A, d)$vectors } else { vec <- eigen(A, symmetric = TRUE)$vectors } if (method == 'FastPOI-C') { sqrt(max(rowSums(vec^2))) } else { # 'FastPOI-L' max(abs(vec)) } } } #' Penalysed Orthogonal Iteration. #' #' @param A Left hand side of GEP #' @param B right hand side of GEP #' @param d number of eigen-vectors, -values to be computed coresponding to the #' largest \eqn{d} eigenvalues of the penalized GEP #' @param sparsity scaling for max penalty term in [0, 1) where 0 corresponds to #' no penalization and 1 leads to the trivial solution. (default: 1 / 2) #' @param method ether \code{"POI-C"} or \code{"FastPOI-C"} where #' POI-C: Penalized Orthogonal Iteration with Coordinate-wise Lasso penalty #' FastPOI-C: Fast POI with Coordinate-wise Lasso penalty #' where the Coordinate-wise Lasso is a group Lasso penalty. #' @param iter.outer maximum number of orthogonal iterations (ignored by Fast #' methods) #' @param iter.inner maximum number of inner iterations #' @param tol numerical tolerance. Absolute values smaller than \code{tol} are #' treated as 0. #' #' @export POI <- function(A, B, d = 1L, sparsity = 0.5, method = c('POI-C', 'FastPOI-C'), # TODO: Maybe implement Lasso penalty too iter.outer = 100L, iter.inner = 500L, tol = sqrt(.Machine$double.eps), use.C = FALSE ) { method <- match.arg(method) # Compute penalty parameter lambda lambda <- sparsity * POI.lambda.max(A, d, method) # Ensure RHS B to be positive definite if (missing(B)) { B <- diag(nrow(A)) } else { rankB <- qr(B, tol)$rank if (rankB < nrow(B)) { diag(B) <- diag(B) + log(nrow(B)) / rankB } } # In case of zero penalty compute ordinary GEP solution if (lambda == 0) { eig <- solve.eig(A, B, d) return(structure(list( U = eig$vectors, d = eig$values, lambda = 0, call = match.call() ), class = c("tensor_predictor", "POI"))) } # Set initial values if (requireNamespace("RSpectra", quietly = TRUE) && nrow(A) < d) { Delta <- RSpectra::eigs_sym(A, d)$vectors } else { Delta <- eigen(A, symmetric = TRUE)$vectors[, 1:d, drop = FALSE] } # In case of fast POI, only one iteration if (startsWith(method, 'Fast')) { # Step 1: Optimize (inner loop, a.k.a. coordinate wise penalization) if (use.C) { Z <- .Call('FastPOI_C_sub', B, Delta, lambda, as.integer(iter.inner), tol, PACKAGE = 'tensorPredictors') } else { # Initial value Z <- Delta # Note, the R implementation does NOT use a cyclic update instead # performs coordinate penalization in parallel for (j in seq_len(iter.inner)) { Z.last <- Z # for break condition # TODO: it seems (in general) that the cyclic update is actually needed! traces <- Delta - B %*% Z + diag(B) * Z Z <- traces * (pmax(1 - lambda / sqrt(rowSums(traces^2)), 0) / diag(B)) # Inner break condition (second condition safeguards against devergence) diff <- norm(Z.last - Z, 'F') if (diff < tol || 1 < tol * diff) { break } } } # Step 2: QR decomposition (same as below) if (d == 1L) { Z.norm <- norm(Z, 'F') if (Z.norm < tol) { Q <- matrix(0, p, d) } else { Q <- Z / Z.norm } } else { # Detect zero columns. zero.col <- colSums(abs(Z)) < tol if (all(zero.col)) { Q <- matrix(0, p, d) } else if (any(zero.col)) { Q <- matrix(0, p, d) Q[, !zero.col] <- qr.Q(qr(Z[, !zero.col])) } else { Q <- qr.Q(qr(Z)) } } } else { # POI-C Q <- Delta Z <- matrix(0, nrow(Q), ncol(Q)) # Outer loop (iteration) for (i in seq_len(iter.outer)) { Q.last <- Q # for break condition # Step 1: Solve B Z_i = A Q_{i-1} for Z_i Delta <- crossprod(A, Q) # Inner Loop if (use.C) { Z <- .Call('FastPOI_C_sub', B, Delta, lambda, as.integer(iter.inner), tol, PACKAGE = 'tensorPredictors') } else { # Note, the R implementation does NOT use a cyclic update instead # performs coordinate penalization in parallel for (j in seq_len(iter.inner)) { Z.last <- Z # for break condition # TODO: it seems (in general) that the cyclic update is actually needed! traces <- Delta - B %*% Z + diag(B) * Z Z <- traces * (pmax(1 - lambda / sqrt(rowSums(traces^2)), 0) / diag(B)) # Inner break condition (second condition safeguards against devergence) diff <- norm(Z.last - Z, 'F') if (diff < tol || 1 < tol * diff) { break } } } # Step 2: QR decomposition of Z_i = Q_i R_i. if (d == 1L) { Z.norm <- norm(Z, 'F') if (Z.norm < tol) { Q <- matrix(0, p, d) } else { Q <- Z / Z.norm } } else { # Detect zero columns. zero.col <- colSums(abs(Z)) < tol if (all(zero.col)) { Q <- matrix(0, nrow(Z), ncol(Z)) } else if (any(zero.col)) { Q <- matrix(0, nrow(Z), ncol(Z)) Q[, !zero.col] <- qr.Q(qr(Z[, !zero.col])) } else { Q <- qr.Q(qr(Z)) } } # Outer break condition || Q Q' - Q.last Q.last' || < tol # The used form is equivalent but much faster for d << p # The following holds in general for two matrices A, B of dim p x d # || A A' - B B' ||^2 = || A' A ||^2 - 2 || A' B ||^2 + || B' B ||^2 # for the Frobenius norm ||.||. The computational cost of the left side # is O(p^2 d) while the right side has O(p d^2). tr <- sum(crossprod(Q)^2) - 2 * sum(crossprod(Q, Q.last)^2) + sum(crossprod(Q.last)^2) if (sqrt(max(0, tr)) < tol) { break } } } # Reconstruct solution of the original GEP by solving # (Q' A Q) T = (Q' B Q) T D # for T and D which gives the solution U = Q T and Lambda = D. if (1 < d) { eig <- solve.gep(crossprod(Q, A) %*% Q, crossprod(Q, B) %*% Q) vectors <- Q %*% eig$vectors values <- eig$values } else { vectors <- Q values <- c((crossprod(Q, A) %*% Q) / (crossprod(Q, B) %*% Q)) } structure(list( vectors = vectors, values = values, lambda = lambda, call = match.call() ), class = c("tensor_predictor", "POI")) }