231 lines
		
	
	
		
			8.1 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
			
		
		
	
	
			231 lines
		
	
	
		
			8.1 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
solve.gep <- function(A, B, d = nrow(A)) {
 | 
						|
    isrB <- matpow(B, -0.5)
 | 
						|
 | 
						|
    if ((d < nrow(A)) && requireNamespace("RSpectra", quietly = TRUE)) {
 | 
						|
        eig <- RSpectra::eigs_sym(isrB %*% A %*% isrB, d)
 | 
						|
    } else {
 | 
						|
        eig <- eigen(isrB %*% A %*% isrB, symmetric = TRUE)
 | 
						|
    }
 | 
						|
 | 
						|
    list(vectors = isrB %*% eig$vectors[, 1:d, drop = FALSE], values = eig$values)
 | 
						|
}
 | 
						|
 | 
						|
POI.lambda.max <- function(A, d = 1L, method = c('POI-C', 'POI-L', 'FastPOI-C', 'FastPOI-L')) {
 | 
						|
    method <- match.arg(method)
 | 
						|
 | 
						|
    if (method %in% c('POI-C', 'POI-L')) {
 | 
						|
        A2 <- apply(apply(A^2, 2, sort, decreasing = TRUE), 2, cumsum)
 | 
						|
        lambda.max <- sqrt(apply(A2[1:d, , drop = FALSE], 1, max))
 | 
						|
        if (method == 'POI-C') {
 | 
						|
            lambda.max[d]
 | 
						|
        } else {
 | 
						|
            lambda.max[1]
 | 
						|
        }
 | 
						|
    } else {
 | 
						|
        if (requireNamespace("RSpectra", quietly = TRUE)) {
 | 
						|
            vec <- RSpectra::eigs_sym(A, d)$vectors
 | 
						|
        } else {
 | 
						|
            vec <- eigen(A, symmetric = TRUE)$vectors
 | 
						|
        }
 | 
						|
        if (method == 'FastPOI-C') {
 | 
						|
            sqrt(max(rowSums(vec^2)))
 | 
						|
        } else { # 'FastPOI-L'
 | 
						|
            max(abs(vec))
 | 
						|
        }
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
#' Penalysed Orthogonal Iteration.
 | 
						|
#'
 | 
						|
#' @param A Left hand side of GEP
 | 
						|
#' @param B right hand side of GEP
 | 
						|
#' @param d number of eigen-vectors, -values to be computed coresponding to the
 | 
						|
#'  largest \eqn{d} eigenvalues of the penalized GEP
 | 
						|
#' @param sparsity scaling for max penalty term in [0, 1) where 0 corresponds to
 | 
						|
#'  no penalization and 1 leads to the trivial solution. (default: 1 / 2)
 | 
						|
#' @param method ether \code{"POI-C"} or \code{"FastPOI-C"} where
 | 
						|
#'      POI-C: Penalized Orthogonal Iteration with Coordinate-wise Lasso penalty
 | 
						|
#'      FastPOI-C: Fast POI with Coordinate-wise Lasso penalty
 | 
						|
#'  where the Coordinate-wise Lasso is a group Lasso penalty.
 | 
						|
#' @param iter.outer maximum number of orthogonal iterations (ignored by Fast
 | 
						|
#'  methods)
 | 
						|
#' @param iter.inner maximum number of inner iterations
 | 
						|
#' @param tol numerical tolerance. Absolute values smaller than \code{tol} are
 | 
						|
#'  treated as 0.
 | 
						|
#'
 | 
						|
#' @export
 | 
						|
POI <- function(A, B, d = 1L, sparsity = 0.5,
 | 
						|
    method = c('POI-C', 'FastPOI-C'), # TODO: Maybe implement Lasso penalty too
 | 
						|
    iter.outer = 100L, iter.inner = 500L,
 | 
						|
    tol = sqrt(.Machine$double.eps),
 | 
						|
    use.C = FALSE
 | 
						|
) {
 | 
						|
    method <- match.arg(method)
 | 
						|
 | 
						|
    # Compute penalty parameter lambda
 | 
						|
    lambda <- sparsity * POI.lambda.max(A, d, method)
 | 
						|
 | 
						|
    # Ensure RHS B to be positive definite
 | 
						|
    if (missing(B)) {
 | 
						|
        B <- diag(nrow(A))
 | 
						|
    } else {
 | 
						|
        rankB <- qr(B, tol)$rank
 | 
						|
        if (rankB < nrow(B)) {
 | 
						|
            diag(B) <- diag(B) + log(nrow(B)) / rankB
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    # In case of zero penalty compute ordinary GEP solution
 | 
						|
    if (lambda == 0) {
 | 
						|
        eig <- solve.eig(A, B, d)
 | 
						|
        return(structure(list(
 | 
						|
            U = eig$vectors, d = eig$values,
 | 
						|
            lambda = 0, call = match.call()
 | 
						|
        ), class = c("tensor_predictor", "POI")))
 | 
						|
    }
 | 
						|
 | 
						|
    # Set initial values
 | 
						|
    if (requireNamespace("RSpectra", quietly = TRUE) && nrow(A) < d) {
 | 
						|
        Delta <- RSpectra::eigs_sym(A, d)$vectors
 | 
						|
    } else {
 | 
						|
        Delta <- eigen(A, symmetric = TRUE)$vectors[, 1:d, drop = FALSE]
 | 
						|
    }
 | 
						|
 | 
						|
    # In case of fast POI, only one iteration
 | 
						|
    if (startsWith(method, 'Fast')) {
 | 
						|
        # Step 1: Optimize (inner loop, a.k.a. coordinate wise penalization)
 | 
						|
        if (use.C) {
 | 
						|
            Z <- .Call('FastPOI_C_sub', B, Delta, lambda,
 | 
						|
                    as.integer(iter.inner), tol,
 | 
						|
                    PACKAGE = 'tensorPredictors')
 | 
						|
        } else {
 | 
						|
            # Initial value
 | 
						|
            Z <- Delta
 | 
						|
 | 
						|
            # Note, the R implementation does NOT use a cyclic update instead
 | 
						|
            # performs coordinate penalization in parallel
 | 
						|
            for (j in seq_len(iter.inner)) {
 | 
						|
                Z.last <- Z # for break condition
 | 
						|
 | 
						|
                # TODO: it seems (in general) that the cyclic update is actually needed!
 | 
						|
                traces <- Delta - B %*% Z + diag(B) * Z
 | 
						|
                Z <- traces * (pmax(1 - lambda / sqrt(rowSums(traces^2)), 0) / diag(B))
 | 
						|
 | 
						|
                # Inner break condition (second condition safeguards against devergence)
 | 
						|
                diff <- norm(Z.last - Z, 'F')
 | 
						|
                if (diff < tol || 1 < tol * diff) {
 | 
						|
                    break
 | 
						|
                }
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        # Step 2: QR decomposition (same as below)
 | 
						|
        if (d == 1L) {
 | 
						|
            Z.norm <- norm(Z, 'F')
 | 
						|
            if (Z.norm < tol) {
 | 
						|
                Q <- matrix(0, p, d)
 | 
						|
            } else {
 | 
						|
                Q <- Z / Z.norm
 | 
						|
            }
 | 
						|
        } else {
 | 
						|
            # Detect zero columns.
 | 
						|
            zero.col <- colSums(abs(Z)) < tol
 | 
						|
            if (all(zero.col)) {
 | 
						|
                Q <- matrix(0, p, d)
 | 
						|
            } else if (any(zero.col)) {
 | 
						|
                Q <- matrix(0, p, d)
 | 
						|
                Q[, !zero.col] <- qr.Q(qr(Z[, !zero.col]))
 | 
						|
            } else {
 | 
						|
                Q <- qr.Q(qr(Z))
 | 
						|
            }
 | 
						|
        }
 | 
						|
    } else { # POI-C
 | 
						|
        Q <- Delta
 | 
						|
        Z <- matrix(0, nrow(Q), ncol(Q))
 | 
						|
 | 
						|
        # Outer loop (iteration)
 | 
						|
        for (i in seq_len(iter.outer)) {
 | 
						|
            Q.last <- Q # for break condition
 | 
						|
 | 
						|
            # Step 1: Solve B Z_i = A Q_{i-1} for Z_i
 | 
						|
            Delta <- crossprod(A, Q)
 | 
						|
 | 
						|
            # Inner Loop
 | 
						|
            if (use.C) {
 | 
						|
                Z <- .Call('FastPOI_C_sub', B, Delta, lambda,
 | 
						|
                        as.integer(iter.inner), tol,
 | 
						|
                        PACKAGE = 'tensorPredictors')
 | 
						|
            } else {
 | 
						|
                # Note, the R implementation does NOT use a cyclic update instead
 | 
						|
                # performs coordinate penalization in parallel
 | 
						|
                for (j in seq_len(iter.inner)) {
 | 
						|
                    Z.last <- Z # for break condition
 | 
						|
 | 
						|
                    # TODO: it seems (in general) that the cyclic update is actually needed!
 | 
						|
                    traces <- Delta - B %*% Z + diag(B) * Z
 | 
						|
                    Z <- traces * (pmax(1 - lambda / sqrt(rowSums(traces^2)), 0) / diag(B))
 | 
						|
 | 
						|
                    # Inner break condition (second condition safeguards against devergence)
 | 
						|
                    diff <- norm(Z.last - Z, 'F')
 | 
						|
                    if (diff < tol || 1 < tol * diff) {
 | 
						|
                        break
 | 
						|
                    }
 | 
						|
                }
 | 
						|
            }
 | 
						|
 | 
						|
            # Step 2: QR decomposition of Z_i = Q_i R_i.
 | 
						|
            if (d == 1L) {
 | 
						|
                Z.norm <- norm(Z, 'F')
 | 
						|
                if (Z.norm < tol) {
 | 
						|
                    Q <- matrix(0, p, d)
 | 
						|
                } else {
 | 
						|
                    Q <- Z / Z.norm
 | 
						|
                }
 | 
						|
            } else {
 | 
						|
                # Detect zero columns.
 | 
						|
                zero.col <- colSums(abs(Z)) < tol
 | 
						|
                if (all(zero.col)) {
 | 
						|
                    Q <- matrix(0, nrow(Z), ncol(Z))
 | 
						|
                } else if (any(zero.col)) {
 | 
						|
                    Q <- matrix(0, nrow(Z), ncol(Z))
 | 
						|
                    Q[, !zero.col] <- qr.Q(qr(Z[, !zero.col]))
 | 
						|
                } else {
 | 
						|
                    Q <- qr.Q(qr(Z))
 | 
						|
                }
 | 
						|
            }
 | 
						|
 | 
						|
            # Outer break condition || Q Q' - Q.last Q.last' || < tol
 | 
						|
            # The used form is equivalent but much faster for d << p
 | 
						|
            # The following holds in general for two matrices A, B of dim p x d
 | 
						|
            #   || A A' - B B' ||^2 = || A' A ||^2 - 2 || A' B ||^2 + || B' B ||^2
 | 
						|
            # for the Frobenius norm ||.||. The computational cost of the left side
 | 
						|
            # is O(p^2 d) while the right side has O(p d^2).
 | 
						|
            tr <- sum(crossprod(Q)^2) -
 | 
						|
            2 * sum(crossprod(Q, Q.last)^2) +
 | 
						|
                sum(crossprod(Q.last)^2)
 | 
						|
            if (sqrt(max(0, tr)) < tol) {
 | 
						|
                break
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    # Reconstruct solution of the original GEP by solving
 | 
						|
    #   (Q' A Q) T = (Q' B Q) T D
 | 
						|
    # for T and D which gives the solution U = Q T and Lambda = D.
 | 
						|
    if (1 < d) {
 | 
						|
        eig <- solve.gep(crossprod(Q, A) %*% Q, crossprod(Q, B) %*% Q)
 | 
						|
        vectors <- Q %*% eig$vectors
 | 
						|
        values <- eig$values
 | 
						|
    } else {
 | 
						|
        vectors <- Q
 | 
						|
        values <- c((crossprod(Q, A) %*% Q) / (crossprod(Q, B) %*% Q))
 | 
						|
    }
 | 
						|
 | 
						|
    structure(list(
 | 
						|
        vectors = vectors,
 | 
						|
        values = values,
 | 
						|
        lambda = lambda,
 | 
						|
        call = match.call()
 | 
						|
    ), class = c("tensor_predictor", "POI"))
 | 
						|
}
 |