tensor_predictors/tensorPredictors/R/POI.R

solve.gep <- function(A, B, d = nrow(A)) {
    isrB <- matpow(B, -0.5)

    if ((d < nrow(A)) && requireNamespace("RSpectra", quietly = TRUE)) {
        eig <- RSpectra::eigs_sym(isrB %*% A %*% isrB, d)
    } else {
        eig <- eigen(isrB %*% A %*% isrB, symmetric = TRUE)
    }

    list(vectors = isrB %*% eig$vectors[, 1:d, drop = FALSE], values = eig$values)
}

POI.lambda.max <- function(A, d = 1L, method = c('POI-C', 'POI-L', 'FastPOI-C', 'FastPOI-L')) {
    method <- match.arg(method)

    if (method %in% c('POI-C', 'POI-L')) {
        A2 <- apply(apply(A^2, 2, sort, decreasing = TRUE), 2, cumsum)
        lambda.max <- sqrt(apply(A2[1:d, , drop = FALSE], 1, max))
        if (method == 'POI-C') {
            lambda.max[d]
        } else {
            lambda.max[1]
        }
    } else {
        if (requireNamespace("RSpectra", quietly = TRUE)) {
            vec <- RSpectra::eigs_sym(A, d)$vectors
        } else {
            vec <- eigen(A, symmetric = TRUE)$vectors
        }
        if (method == 'FastPOI-C') {
            sqrt(max(rowSums(vec^2)))
        } else { # 'FastPOI-L'
            max(abs(vec))
        }
    }
}

#' Penalysed Orthogonal Iteration.
#'
#' @param A Left hand side of GEP
#' @param B right hand side of GEP
#' @param d number of eigen-vectors, -values to be computed coresponding to the
#'  largest \eqn{d} eigenvalues of the penalized GEP
#' @param sparsity scaling for max penalty term in [0, 1) where 0 corresponds to
#'  no penalization and 1 leads to the trivial solution. (default: 1 / 2)
#' @param method ether \code{"POI-C"} or \code{"FastPOI-C"} where
#'      POI-C: Penalized Orthogonal Iteration with Coordinate-wise Lasso penalty
#'      FastPOI-C: Fast POI with Coordinate-wise Lasso penalty
#'  where the Coordinate-wise Lasso is a group Lasso penalty.
#' @param iter.outer maximum number of orthogonal iterations (ignored by Fast
#'  methods)
#' @param iter.inner maximum number of inner iterations
#' @param tol numerical tolerance. Absolute values smaller than \code{tol} are
#'  treated as 0.
#'
#' @export
POI <- function(A, B, d = 1L, sparsity = 0.5,
    method = c('POI-C', 'FastPOI-C'), # TODO: Maybe implement Lasso penalty too
    iter.outer = 100L, iter.inner = 500L,
    tol = sqrt(.Machine$double.eps),
    use.C = FALSE
) {
    method <- match.arg(method)

    # Compute penalty parameter lambda
    lambda <- sparsity * POI.lambda.max(A, d, method)

    # Ensure RHS B to be positive definite
    if (missing(B)) {
        B <- diag(nrow(A))
    } else {
        rankB <- qr(B, tol)$rank
        if (rankB < nrow(B)) {
            diag(B) <- diag(B) + log(nrow(B)) / rankB
        }
    }

    # In case of zero penalty compute ordinary GEP solution
    if (lambda == 0) {
        eig <- solve.eig(A, B, d)
        return(structure(list(
            U = eig$vectors, d = eig$values,
            lambda = 0, call = match.call()
        ), class = c("tensor_predictor", "POI")))
    }

    # Set initial values
    if (requireNamespace("RSpectra", quietly = TRUE) && nrow(A) < d) {
        Delta <- RSpectra::eigs_sym(A, d)$vectors
    } else {
        Delta <- eigen(A, symmetric = TRUE)$vectors[, 1:d, drop = FALSE]
    }

    # In case of fast POI, only one iteration
    if (startsWith(method, 'Fast')) {
        # Step 1: Optimize (inner loop, a.k.a. coordinate wise penalization)
        if (use.C) {
            Z <- .Call('FastPOI_C_sub', B, Delta, lambda,
                    as.integer(iter.inner), tol,
                    PACKAGE = 'tensorPredictors')
        } else {
            # Initial value
            Z <- Delta

            # Note, the R implementation does NOT use a cyclic update instead
            # performs coordinate penalization in parallel
            for (j in seq_len(iter.inner)) {
                Z.last <- Z # for break condition

                # TODO: it seems (in general) that the cyclic update is actually needed!
                traces <- Delta - B %*% Z + diag(B) * Z
                Z <- traces * (pmax(1 - lambda / sqrt(rowSums(traces^2)), 0) / diag(B))

                # Inner break condition (second condition safeguards against devergence)
                diff <- norm(Z.last - Z, 'F')
                if (diff < tol || 1 < tol * diff) {
                    break
                }
            }
        }

        # Step 2: QR decomposition (same as below)
        if (d == 1L) {
            Z.norm <- norm(Z, 'F')
            if (Z.norm < tol) {
                Q <- matrix(0, p, d)
            } else {
                Q <- Z / Z.norm
            }
        } else {
            # Detect zero columns.
            zero.col <- colSums(abs(Z)) < tol
            if (all(zero.col)) {
                Q <- matrix(0, p, d)
            } else if (any(zero.col)) {
                Q <- matrix(0, p, d)
                Q[, !zero.col] <- qr.Q(qr(Z[, !zero.col]))
            } else {
                Q <- qr.Q(qr(Z))
            }
        }
    } else { # POI-C
        Q <- Delta
        Z <- matrix(0, nrow(Q), ncol(Q))

        # Outer loop (iteration)
        for (i in seq_len(iter.outer)) {
            Q.last <- Q # for break condition

            # Step 1: Solve B Z_i = A Q_{i-1} for Z_i
            Delta <- crossprod(A, Q)

            # Inner Loop
            if (use.C) {
                Z <- .Call('FastPOI_C_sub', B, Delta, lambda,
                        as.integer(iter.inner), tol,
                        PACKAGE = 'tensorPredictors')
            } else {
                # Note, the R implementation does NOT use a cyclic update instead
                # performs coordinate penalization in parallel
                for (j in seq_len(iter.inner)) {
                    Z.last <- Z # for break condition

                    # TODO: it seems (in general) that the cyclic update is actually needed!
                    traces <- Delta - B %*% Z + diag(B) * Z
                    Z <- traces * (pmax(1 - lambda / sqrt(rowSums(traces^2)), 0) / diag(B))

                    # Inner break condition (second condition safeguards against devergence)
                    diff <- norm(Z.last - Z, 'F')
                    if (diff < tol || 1 < tol * diff) {
                        break
                    }
                }
            }

            # Step 2: QR decomposition of Z_i = Q_i R_i.
            if (d == 1L) {
                Z.norm <- norm(Z, 'F')
                if (Z.norm < tol) {
                    Q <- matrix(0, p, d)
                } else {
                    Q <- Z / Z.norm
                }
            } else {
                # Detect zero columns.
                zero.col <- colSums(abs(Z)) < tol
                if (all(zero.col)) {
                    Q <- matrix(0, nrow(Z), ncol(Z))
                } else if (any(zero.col)) {
                    Q <- matrix(0, nrow(Z), ncol(Z))
                    Q[, !zero.col] <- qr.Q(qr(Z[, !zero.col]))
                } else {
                    Q <- qr.Q(qr(Z))
                }
            }

            # Outer break condition || Q Q' - Q.last Q.last' || < tol
            # The used form is equivalent but much faster for d << p
            # The following holds in general for two matrices A, B of dim p x d
            #   || A A' - B B' ||^2 = || A' A ||^2 - 2 || A' B ||^2 + || B' B ||^2
            # for the Frobenius norm ||.||. The computational cost of the left side
            # is O(p^2 d) while the right side has O(p d^2).
            tr <- sum(crossprod(Q)^2) -
            2 * sum(crossprod(Q, Q.last)^2) +
                sum(crossprod(Q.last)^2)
            if (sqrt(max(0, tr)) < tol) {
                break
            }
        }
    }

    # Reconstruct solution of the original GEP by solving
    #   (Q' A Q) T = (Q' B Q) T D
    # for T and D which gives the solution U = Q T and Lambda = D.
    if (1 < d) {
        eig <- solve.gep(crossprod(Q, A) %*% Q, crossprod(Q, B) %*% Q)
        vectors <- Q %*% eig$vectors
        values <- eig$values
    } else {
        vectors <- Q
        values <- c((crossprod(Q, A) %*% Q) / (crossprod(Q, B) %*% Q))
    }

    structure(list(
        vectors = vectors,
        values = values,
        lambda = lambda,
        call = match.call()
    ), class = c("tensor_predictor", "POI"))
}
wip: POI 2021-11-12 17:22:45 +00:00			`solve.gep <- function(A, B, d = nrow(A)) {`
			`isrB <- matpow(B, -0.5)`

wip: poi, add: projection dist (sin of max principal angle) 2021-11-16 11:01:12 +00:00			`if ((d < nrow(A)) && requireNamespace("RSpectra", quietly = TRUE)) {`
wip: POI 2021-11-12 17:22:45 +00:00			`eig <- RSpectra::eigs_sym(isrB %% A %% isrB, d)`
			`} else {`
			`eig <- eigen(isrB %% A %% isrB, symmetric = TRUE)`
			`}`

wip: poi, add: projection dist (sin of max principal angle) 2021-11-16 11:01:12 +00:00			`list(vectors = isrB %*% eig$vectors[, 1:d, drop = FALSE], values = eig$values)`
wip: POI 2021-11-12 17:22:45 +00:00			`}`

			`POI.lambda.max <- function(A, d = 1L, method = c('POI-C', 'POI-L', 'FastPOI-C', 'FastPOI-L')) {`
			`method <- match.arg(method)`

			`if (method %in% c('POI-C', 'POI-L')) {`
			`A2 <- apply(apply(A^2, 2, sort, decreasing = TRUE), 2, cumsum)`
			`lambda.max <- sqrt(apply(A2[1:d, , drop = FALSE], 1, max))`
			`if (method == 'POI-C') {`
			`lambda.max[d]`
			`} else {`
			`lambda.max[1]`
			`}`
			`} else {`
			`if (requireNamespace("RSpectra", quietly = TRUE)) {`
			`vec <- RSpectra::eigs_sym(A, d)$vectors`
			`} else {`
			`vec <- eigen(A, symmetric = TRUE)$vectors`
			`}`
			`if (method == 'FastPOI-C') {`
			`sqrt(max(rowSums(vec^2)))`
			`} else { # 'FastPOI-L'`
			`max(abs(vec))`
			`}`
			`}`
			`}`

add (initial): source 2020-06-10 14:35:27 +00:00			`#' Penalysed Orthogonal Iteration.`
			`#'`
wip: poi, add: projection dist (sin of max principal angle) 2021-11-16 11:01:12 +00:00			`#' @param A Left hand side of GEP`
			`#' @param B right hand side of GEP`
			`#' @param d number of eigen-vectors, -values to be computed coresponding to the`
			`#' largest \eqn{d} eigenvalues of the penalized GEP`
			`#' @param sparsity scaling for max penalty term in [0, 1) where 0 corresponds to`
			`#' no penalization and 1 leads to the trivial solution. (default: 1 / 2)`
			`#' @param method ether \code{"POI-C"} or \code{"FastPOI-C"} where`
			`#' POI-C: Penalized Orthogonal Iteration with Coordinate-wise Lasso penalty`
			`#' FastPOI-C: Fast POI with Coordinate-wise Lasso penalty`
			`#' where the Coordinate-wise Lasso is a group Lasso penalty.`
			`#' @param iter.outer maximum number of orthogonal iterations (ignored by Fast`
			`#' methods)`
			`#' @param iter.inner maximum number of inner iterations`
			`#' @param tol numerical tolerance. Absolute values smaller than \code{tol} are`
			`#' treated as 0.`
wip: convert to R-package 2021-10-29 16:16:40 +00:00			`#'`
			`#' @export`
wip: POI 2021-11-12 17:22:45 +00:00			`POI <- function(A, B, d = 1L, sparsity = 0.5,`
wip: poi, add: projection dist (sin of max principal angle) 2021-11-16 11:01:12 +00:00			`method = c('POI-C', 'FastPOI-C'), # TODO: Maybe implement Lasso penalty too`
wip: POI 2021-11-12 17:22:45 +00:00			`iter.outer = 100L, iter.inner = 500L,`
wip: poi, add: projection dist (sin of max principal angle) 2021-11-16 11:01:12 +00:00			`tol = sqrt(.Machine$double.eps),`
			`use.C = FALSE`
wip: POI 2021-11-12 17:22:45 +00:00			`) {`
			`method <- match.arg(method)`

			`# Compute penalty parameter lambda`
			`lambda <- sparsity * POI.lambda.max(A, d, method)`

			`# Ensure RHS B to be positive definite`
			`if (missing(B)) {`
			`B <- diag(nrow(A))`
			`} else {`
			`rankB <- qr(B, tol)$rank`
			`if (rankB < nrow(B)) {`
			`diag(B) <- diag(B) + log(nrow(B)) / rankB`
			`}`
			`}`

			`# In case of zero penalty compute ordinary GEP solution`
			`if (lambda == 0) {`
			`eig <- solve.eig(A, B, d)`
			`return(structure(list(`
			`U = eig$vectors, d = eig$values,`
			`lambda = 0, call = match.call()`
			`), class = c("tensor_predictor", "POI")))`
			`}`

			`# Set initial values`
			`if (requireNamespace("RSpectra", quietly = TRUE) && nrow(A) < d) {`
wip: convert to package 2021-11-04 12:05:15 +00:00			`Delta <- RSpectra::eigs_sym(A, d)$vectors`
add (initial): source 2020-06-10 14:35:27 +00:00			`} else {`
wip: convert to package 2021-11-04 12:05:15 +00:00			`Delta <- eigen(A, symmetric = TRUE)$vectors[, 1:d, drop = FALSE]`
add (initial): source 2020-06-10 14:35:27 +00:00			`}`
wip: POI 2021-11-12 17:22:45 +00:00
wip: poi, add: projection dist (sin of max principal angle) 2021-11-16 11:01:12 +00:00			`# In case of fast POI, only one iteration`
			`if (startsWith(method, 'Fast')) {`
			`# Step 1: Optimize (inner loop, a.k.a. coordinate wise penalization)`
			`if (use.C) {`
			`Z <- .Call('FastPOI_C_sub', B, Delta, lambda,`
			`as.integer(iter.inner), tol,`
			`PACKAGE = 'tensorPredictors')`
			`} else {`
			`# Initial value`
			`Z <- Delta`

			`# Note, the R implementation does NOT use a cyclic update instead`
			`# performs coordinate penalization in parallel`
			`for (j in seq_len(iter.inner)) {`
			`Z.last <- Z # for break condition`

			`# TODO: it seems (in general) that the cyclic update is actually needed!`
			`traces <- Delta - B %% Z + diag(B) Z`
			`Z <- traces * (pmax(1 - lambda / sqrt(rowSums(traces^2)), 0) / diag(B))`

			`# Inner break condition (second condition safeguards against devergence)`
			`diff <- norm(Z.last - Z, 'F')`
			`if (diff < tol \|\| 1 < tol * diff) {`
			`break`
			`}`
add (initial): source 2020-06-10 14:35:27 +00:00			`}`
			`}`

wip: poi, add: projection dist (sin of max principal angle) 2021-11-16 11:01:12 +00:00			`# Step 2: QR decomposition (same as below)`
wip: POI 2021-11-12 17:22:45 +00:00			`if (d == 1L) {`
			`Z.norm <- norm(Z, 'F')`
			`if (Z.norm < tol) {`
			`Q <- matrix(0, p, d)`
			`} else {`
			`Q <- Z / Z.norm`
			`}`
add (initial): source 2020-06-10 14:35:27 +00:00			`} else {`
wip: POI 2021-11-12 17:22:45 +00:00			`# Detect zero columns.`
			`zero.col <- colSums(abs(Z)) < tol`
			`if (all(zero.col)) {`
			`Q <- matrix(0, p, d)`
			`} else if (any(zero.col)) {`
			`Q <- matrix(0, p, d)`
			`Q[, !zero.col] <- qr.Q(qr(Z[, !zero.col]))`
			`} else {`
			`Q <- qr.Q(qr(Z))`
			`}`
add (initial): source 2020-06-10 14:35:27 +00:00			`}`
wip: poi, add: projection dist (sin of max principal angle) 2021-11-16 11:01:12 +00:00			`} else { # POI-C`
			`Q <- Delta`
			`Z <- matrix(0, nrow(Q), ncol(Q))`

			`# Outer loop (iteration)`
			`for (i in seq_len(iter.outer)) {`
			`Q.last <- Q # for break condition`

			`# Step 1: Solve B Z_i = A Q_{i-1} for Z_i`
			`Delta <- crossprod(A, Q)`

			`# Inner Loop`
			`if (use.C) {`
			`Z <- .Call('FastPOI_C_sub', B, Delta, lambda,`
			`as.integer(iter.inner), tol,`
			`PACKAGE = 'tensorPredictors')`
			`} else {`
			`# Note, the R implementation does NOT use a cyclic update instead`
			`# performs coordinate penalization in parallel`
			`for (j in seq_len(iter.inner)) {`
			`Z.last <- Z # for break condition`

			`# TODO: it seems (in general) that the cyclic update is actually needed!`
			`traces <- Delta - B %% Z + diag(B) Z`
			`Z <- traces * (pmax(1 - lambda / sqrt(rowSums(traces^2)), 0) / diag(B))`

			`# Inner break condition (second condition safeguards against devergence)`
			`diff <- norm(Z.last - Z, 'F')`
			`if (diff < tol \|\| 1 < tol * diff) {`
			`break`
			`}`
			`}`
			`}`
wip: POI 2021-11-12 17:22:45 +00:00
wip: poi, add: projection dist (sin of max principal angle) 2021-11-16 11:01:12 +00:00			`# Step 2: QR decomposition of Z_i = Q_i R_i.`
			`if (d == 1L) {`
			`Z.norm <- norm(Z, 'F')`
			`if (Z.norm < tol) {`
			`Q <- matrix(0, p, d)`
			`} else {`
			`Q <- Z / Z.norm`
			`}`
			`} else {`
			`# Detect zero columns.`
			`zero.col <- colSums(abs(Z)) < tol`
			`if (all(zero.col)) {`
			`Q <- matrix(0, nrow(Z), ncol(Z))`
			`} else if (any(zero.col)) {`
			`Q <- matrix(0, nrow(Z), ncol(Z))`
			`Q[, !zero.col] <- qr.Q(qr(Z[, !zero.col]))`
			`} else {`
			`Q <- qr.Q(qr(Z))`
			`}`
			`}`

			`# Outer break condition \|\| Q Q' - Q.last Q.last' \|\| < tol`
			`# The used form is equivalent but much faster for d << p`
			`# The following holds in general for two matrices A, B of dim p x d`
			`# \|\| A A' - B B' \|\|^2 = \|\| A' A \|\|^2 - 2 \|\| A' B \|\|^2 + \|\| B' B \|\|^2`
			`# for the Frobenius norm \|\|.\|\|. The computational cost of the left side`
			`# is O(p^2 d) while the right side has O(p d^2).`
			`tr <- sum(crossprod(Q)^2) -`
			`2 * sum(crossprod(Q, Q.last)^2) +`
			`sum(crossprod(Q.last)^2)`
			`if (sqrt(max(0, tr)) < tol) {`
			`break`
			`}`
add (initial): source 2020-06-10 14:35:27 +00:00			`}`
			`}`

wip: poi, add: projection dist (sin of max principal angle) 2021-11-16 11:01:12 +00:00			`# Reconstruct solution of the original GEP by solving`
			`# (Q' A Q) T = (Q' B Q) T D`
			`# for T and D which gives the solution U = Q T and Lambda = D.`
			`if (1 < d) {`
			`eig <- solve.gep(crossprod(Q, A) %% Q, crossprod(Q, B) %% Q)`
			`vectors <- Q %*% eig$vectors`
			`values <- eig$values`
			`} else {`
			`vectors <- Q`
			`values <- c((crossprod(Q, A) %% Q) / (crossprod(Q, B) %% Q))`
			`}`
wip: POI 2021-11-12 17:22:45 +00:00
			`structure(list(`
wip: poi, add: projection dist (sin of max principal angle) 2021-11-16 11:01:12 +00:00			`vectors = vectors,`
			`values = values,`
wip: POI 2021-11-12 17:22:45 +00:00			`lambda = lambda,`
			`call = match.call()`
			`), class = c("tensor_predictor", "POI"))`
add (initial): source 2020-06-10 14:35:27 +00:00			`}`