wip: poi, add: projection dist (sin of max principal angle)

2021-11-16 12:01:12 +01:00 · 2021-11-16 12:01:12 +01:00 · 9123baccdd
commit 9123baccdd
parent b0151dfafb
6 changed files with 306 additions and 117 deletions
--- a/simulations/simulation_poi.R
+++ b/simulations/simulation_poi.R
@ -0,0 +1,111 @@
 ################################################################################
 ###                 LDA (sparse Linear Discrimina Analysis)                  ###
 ################################################################################
 devtools::load_all('tensorPredictors/')
 C <- function(rho, p) {
    res <- matrix(rho, p, p)
    diag(res) <- 1
    res
 }
 R <- function(rho, p) {
    rho^abs(outer(1:p, 1:p, `-`))
 }
 dataset <- function(nr) {
    K <- 3          # Nr. Groups
    n.i <- 30       # Sample group size for each of the K groups
    n <- K * n.i    # Sample size
    p <- 200        # Nr. of predictors
    # Generate test data
    V <- cbind(matrix(c(
        2, 1, 2, 1, 2,
        1,-1, 1,-1, 1,
        0, 1,-1, 1, 0
    ), 3, 5, byrow = TRUE),
        matrix(0, 3, p - 5)
    )
    W <- cbind(matrix(c(
    -1, 1, 1, 1, 1,
        1,-1, 1,-1, 1,
        1, 1,-1, 1, 0
    ), 3, 5, byrow = TRUE),
        matrix(0, 3, p - 5)
    )
    if (nr == 1) {          # Model 1
        y <- factor(rep(1:K, each = n.i))
        X <- rmvnorm(n, mu = rep(0, p)) + V[y, ]
        B <- cbind(V[1, ] - V[2, ], V[2, ] - V[3, ])
    } else if (nr == 2) {   # Model 2
        y <- factor(rep(1:K, each = n.i))
        X <- rmvnorm(n, sigma = C(0.5, p)) + (V %*% C(0.5, p))[y, ]
        B <- cbind(V[1, ] - V[2, ], V[2, ] - V[3, ])
    } else if (nr == 3) {   # Model 3
        y <- factor(rep(1:K, each = n.i))
        X <- rmvnorm(n, sigma = R(0.5, p)) + (V %*% R(0.5, p))[y, ]
        B <- cbind(V[1, ] - V[2, ], V[2, ] - V[3, ])
    } else if (nr == 4) {   # Model 4
        y <- factor(rep(1:K, each = n.i))
        X <- rmvnorm(n, sigma = C(0.5, p)) + (W %*% C(0.5, p))[y, ]
        B <- cbind(W[1, ] - W[2, ], W[2, ] - W[3, ])
    } else if (nr == 5) {   # Model 5
        K <- 4
        n <- K * n.i
        W.tilde <- 2 * rbind(W, colMeans(W))
        mu.tilde <- W.tilde %*% C(0.5, p)
        y <- factor(rep(1:K, each = n.i))
        X <- rmvnorm(n, sigma = C(0.5, p)) + mu.tilde[y, ]
        B <- cbind(W[1, ] - W[2, ], W[2, ] - W[3, ])
    } else {
        stop("Unknown model nr.")
    }
    list(X = X, y = y, B = qr.Q(qr(B)))
 }
 # # Model 1
 # fit <- with(dataset(1), {
 #     with(GEP(X, y, 'lda'), POI(lhs, rhs, ncol(B), method = 'POI-C'))
 # })
 # fit <- with(dataset(1), {
 #     with(GEP(X, y, 'lda'), POI(lhs, rhs, ncol(B), method = 'FastPOI-C'))
 # })
 # fit <- with(dataset(1), {
 #     with(GEP(X, y, 'lda'), POI(lhs, rhs, ncol(B), method = 'POI-C', use.C = TRUE))
 # })
 # fit <- with(dataset(1), {
 #     with(GEP(X, y, 'lda'), POI(lhs, rhs, ncol(B), method = 'FastPOI-C', use.C = TRUE))
 # })
 # head(fit$vectors, 10)
 count <- 0
 nr.reps <- 100
 sim <- replicate(nr.reps, {
    res <- double(0)
    for (model.nr in 1:5) {
        for (method in c('POI-C', 'FastPOI-C')) {
            for (use.C in c(FALSE, TRUE)) {
                dist <- with(dataset(model.nr), {
                    fit <- with(GEP(X, y, 'lda'), {
                        POI(lhs, rhs, ncol(B), method = 'POI-C', use.C = use.C)
                    })
                    # dist.subspace(B, fit$vectors, is.ortho = FALSE, normalize = TRUE)
                    dist.projection(B, fit$vectors)
                })
                names(dist) <- paste('M', model.nr, '-', method, '-', use.C)
                res <- c(res, dist)
            }
        }
    }
    cat("Counter", (count <<- count + 1), "/", nr.reps, "\n")
    res
 })
 (stats <- as.matrix(rowMeans(sim)))
--- a/tensorPredictors/R/GEP.R
+++ b/tensorPredictors/R/GEP.R
@ -14,7 +14,7 @@
 #' Reduction and Variable Selection" By Xin Chen, Changliang Zou and
 #' R. Dennis Cook.
 #'
-GEP <- function(X, y, method = c('pfc', 'pca', 'sir', 'save'), ...,
+GEP <- function(X, y, method = c('pfc', 'pca', 'sir', 'lda'), ...,
    nr.slices = 10, ensamble = list(abs, identity, function(x) x^2)
 ) {
    method <- match.arg(method)
@ -52,6 +52,21 @@ GEP <- function(X, y, method = c('pfc', 'pca', 'sir', 'save'), ...,
        })))
        # Sample covariance
        rhs <- cov(X)
    } else if (method == 'lda') {
        # TODO: check this properly!!! (Maybe a bit better implementation and/or
        # some theoretical inaccuracies)
        y <- as.factor(y)
        # group means
        mu <- as.matrix(aggregate(X, list(y), mean)[, -1])
        # between group covariance Sigma.B = Cov(E[X | y])
        lhs <- ((nrow(mu) - 1) / nrow(mu)) * cov(mu)
        # within group covariance (Sigma.T = Sigma.B + Sigma.W)
        # with Sigma.W = E(Cov(X | y)) and Sigma.T = Cov(X)
        rhs <- (((nrow(X) - 1) / nrow(X)) * cov(X)) - lhs
    } else {
        stop('Not implemented!')
    }
--- a/tensorPredictors/R/POI.R
+++ b/tensorPredictors/R/POI.R
@ -1,13 +1,13 @@
 solve.gep <- function(A, B, d = nrow(A)) {
    isrB <- matpow(B, -0.5)
-    if (requireNamespace("RSpectra", quietly = TRUE) && d < nrow(A)) {
+    if ((d < nrow(A)) && requireNamespace("RSpectra", quietly = TRUE)) {
        eig <- RSpectra::eigs_sym(isrB %*% A %*% isrB, d)
    } else {
        eig <- eigen(isrB %*% A %*% isrB, symmetric = TRUE)
    }
-    list(vectors = isrB %*% eig$vectors, values = eig$values)
+    list(vectors = isrB %*% eig$vectors[, 1:d, drop = FALSE], values = eig$values)
 }
 POI.lambda.max <- function(A, d = 1L, method = c('POI-C', 'POI-L', 'FastPOI-C', 'FastPOI-L')) {
@ -37,16 +37,28 @@ POI.lambda.max <- function(A, d = 1L, method = c('POI-C', 'POI-L', 'FastPOI-C',
 #' Penalysed Orthogonal Iteration.
 #'
-#' @param lambda Default: 0.75 * lambda_max for FastPOI-C method.
+#' @param A Left hand side of GEP
-#'
+#' @param B right hand side of GEP
-#' @note use.C required 'poi.so' beeing dynamicaly loaded.
+#' @param d number of eigen-vectors, -values to be computed coresponding to the
-#'  dyn.load('../tensor_predictors/poi.so')
+#'  largest \eqn{d} eigenvalues of the penalized GEP
 #' @param sparsity scaling for max penalty term in [0, 1) where 0 corresponds to
 #'  no penalization and 1 leads to the trivial solution. (default: 1 / 2)
 #' @param method ether \code{"POI-C"} or \code{"FastPOI-C"} where
 #'      POI-C: Penalized Orthogonal Iteration with Coordinate-wise Lasso penalty
 #'      FastPOI-C: Fast POI with Coordinate-wise Lasso penalty
 #'  where the Coordinate-wise Lasso is a group Lasso penalty.
 #' @param iter.outer maximum number of orthogonal iterations (ignored by Fast
 #'  methods)
 #' @param iter.inner maximum number of inner iterations
 #' @param tol numerical tolerance. Absolute values smaller than \code{tol} are
 #'  treated as 0.
 #'
 #' @export
 POI <- function(A, B, d = 1L, sparsity = 0.5,
-    method = c('POI-C', 'FastPOI-C'), # TODO: Maybe implement the the lasso loss too
+    method = c('POI-C', 'FastPOI-C'), # TODO: Maybe implement Lasso penalty too
    iter.outer = 100L, iter.inner = 500L,
-    tol = sqrt(.Machine$double.eps)
+    tol = sqrt(.Machine$double.eps),
    use.C = FALSE
 ) {
    method <- match.arg(method)
@ -78,29 +90,36 @@ POI <- function(A, B, d = 1L, sparsity = 0.5,
    } else {
        Delta <- eigen(A, symmetric = TRUE)$vectors[, 1:d, drop = FALSE]
    }
    Q <- Delta
    Z <- matrix(0, nrow(Q), ncol(Q))
-    # Outer loop (iteration)
+    # In case of fast POI, only one iteration
-    for (i in seq_len(iter.outer)) {
+    if (startsWith(method, 'Fast')) {
-        Q.last <- Q # for break condition
+        # Step 1: Optimize (inner loop, a.k.a. coordinate wise penalization)
        if (use.C) {
            Z <- .Call('FastPOI_C_sub', B, Delta, lambda,
                    as.integer(iter.inner), tol,
                    PACKAGE = 'tensorPredictors')
        } else {
            # Initial value
            Z <- Delta
-        # Step 1: Solve B Z_i = A Q_{i-1} for Z_i
+            # Note, the R implementation does NOT use a cyclic update instead
-        Delta <- crossprod(A, Q)
+            # performs coordinate penalization in parallel
-        # Inner Loop
+            for (j in seq_len(iter.inner)) {
-        for (j in seq_len(iter.inner)) {
+                Z.last <- Z # for break condition
            Z.last <- Z # for break condition
-            traces <- Delta - B %*% Z + diag(B) * Z
+                # TODO: it seems (in general) that the cyclic update is actually needed!
-            Z <- traces * (pmax(1 - lambda / sqrt(rowSums(traces^2)), 0) / diag(B))
+                traces <- Delta - B %*% Z + diag(B) * Z
                Z <- traces * (pmax(1 - lambda / sqrt(rowSums(traces^2)), 0) / diag(B))
-            # Inner break condition
+                # Inner break condition (second condition safeguards against devergence)
-            if (norm(Z.last - Z, 'F') < tol) {
+                diff <- norm(Z.last - Z, 'F')
-                break
+                if (diff < tol || 1 < tol * diff) {
                    break
                }
            }
        }
-        # Step 2: QR decomposition of Z_i = Q_i R_i.
+        # Step 2: QR decomposition (same as below)
        if (d == 1L) {
            Z.norm <- norm(Z, 'F')
            if (Z.norm < tol) {
@ -120,94 +139,92 @@ POI <- function(A, B, d = 1L, sparsity = 0.5,
                Q <- qr.Q(qr(Z))
            }
        }
    } else { # POI-C
        Q <- Delta
        Z <- matrix(0, nrow(Q), ncol(Q))
-        # In case of fast POI, only one iteration
+        # Outer loop (iteration)
-        if (startsWith(method, 'Fast')) {
+        for (i in seq_len(iter.outer)) {
-            break
+            Q.last <- Q # for break condition
-        }
+
-        if (norm(tcrossprod(Q, Q) - tcrossprod(Q.last, Q.last), 'F') < tol) {
+            # Step 1: Solve B Z_i = A Q_{i-1} for Z_i
-            break
+            Delta <- crossprod(A, Q)
            # Inner Loop
            if (use.C) {
                Z <- .Call('FastPOI_C_sub', B, Delta, lambda,
                        as.integer(iter.inner), tol,
                        PACKAGE = 'tensorPredictors')
            } else {
                # Note, the R implementation does NOT use a cyclic update instead
                # performs coordinate penalization in parallel
                for (j in seq_len(iter.inner)) {
                    Z.last <- Z # for break condition
                    # TODO: it seems (in general) that the cyclic update is actually needed!
                    traces <- Delta - B %*% Z + diag(B) * Z
                    Z <- traces * (pmax(1 - lambda / sqrt(rowSums(traces^2)), 0) / diag(B))
                    # Inner break condition (second condition safeguards against devergence)
                    diff <- norm(Z.last - Z, 'F')
                    if (diff < tol || 1 < tol * diff) {
                        break
                    }
                }
            }
            # Step 2: QR decomposition of Z_i = Q_i R_i.
            if (d == 1L) {
                Z.norm <- norm(Z, 'F')
                if (Z.norm < tol) {
                    Q <- matrix(0, p, d)
                } else {
                    Q <- Z / Z.norm
                }
            } else {
                # Detect zero columns.
                zero.col <- colSums(abs(Z)) < tol
                if (all(zero.col)) {
                    Q <- matrix(0, nrow(Z), ncol(Z))
                } else if (any(zero.col)) {
                    Q <- matrix(0, nrow(Z), ncol(Z))
                    Q[, !zero.col] <- qr.Q(qr(Z[, !zero.col]))
                } else {
                    Q <- qr.Q(qr(Z))
                }
            }
            # Outer break condition || Q Q' - Q.last Q.last' || < tol
            # The used form is equivalent but much faster for d << p
            # The following holds in general for two matrices A, B of dim p x d
            #   || A A' - B B' ||^2 = || A' A ||^2 - 2 || A' B ||^2 + || B' B ||^2
            # for the Frobenius norm ||.||. The computational cost of the left side
            # is O(p^2 d) while the right side has O(p d^2).
            tr <- sum(crossprod(Q)^2) -
            2 * sum(crossprod(Q, Q.last)^2) +
                sum(crossprod(Q.last)^2)
            if (sqrt(max(0, tr)) < tol) {
                break
            }
        }
    }
-    # TODO: Finish with transformation to original solution U of
+    # Reconstruct solution of the original GEP by solving
-    #   A U = B U Lambda.
+    #   (Q' A Q) T = (Q' B Q) T D
    # for T and D which gives the solution U = Q T and Lambda = D.
    if (1 < d) {
        eig <- solve.gep(crossprod(Q, A) %*% Q, crossprod(Q, B) %*% Q)
        vectors <- Q %*% eig$vectors
        values <- eig$values
    } else {
        vectors <- Q
        values <- c((crossprod(Q, A) %*% Q) / (crossprod(Q, B) %*% Q))
    }
    structure(list(
-        Z = Z, Q = Q,
+        vectors = vectors,
        values = values,
        lambda = lambda,
        call = match.call()
    ), class = c("tensor_predictor", "POI"))
 }
 # POI.bak <- function(A, B, d,
 #                 lambda = 0.75 * sqrt(max(rowSums(Delta^2))),
 #                 update.tol = 1e-3,
 #                 tol = 100 * .Machine$double.eps,
 #                 maxit = 400L,
 #                 # maxit.outer = maxit,
 #                 maxit.inner = maxit,
 #                 use.C = FALSE,
 #                 method = 'FastPOI-C') {
 #     # TODO:
 #     stopifnot(method == 'FastPOI-C')
 #     if (requireNamespace("RSpectra", quietly = TRUE)) {
 #         Delta <- RSpectra::eigs_sym(A, d)$vectors
 #     } else {
 #         Delta <- eigen(A, symmetric = TRUE)$vectors[, 1:d, drop = FALSE]
 #     }
 #     # Set initial value.
 #     Z <- Delta
 #     # Step 1: Optimization.
 #     # The "inner" optimization loop, aka repeated coordinate optimization.
 #     if (use.C) {
 #         Z <- .Call('FastPOI_C_sub', A, B, Delta, lambda, as.integer(maxit.inner),
 #                    PACKAGE = 'tensorPredictors')
 #     } else {
 #         p <- nrow(Z)
 #         for (iter.inner in 1:maxit.inner) {
 #             Zold <- Z
 #             for (g in 1:p) {
 #                 a <- Delta[g, ] - B[g, ] %*% Z + B[g, g] * Z[g, ]
 #                 a_norm <- sqrt(sum(a^2))
 #                 if (a_norm > lambda) {
 #                     Z[g, ] <- a * ((1 - lambda / a_norm) / B[g, g])
 #                 } else {
 #                     Z[g, ] <- 0
 #                 }
 #             }
 #             if (norm(Z - Zold, 'F') < update.tol) {
 #                 break
 #             }
 #         }
 #     }
 #     # Step 2: QR decomposition.
 #     if (d == 1L) {
 #         Z_norm <- sqrt(sum(Z^2))
 #         if (Z_norm < tol) {
 #             Q <- matrix(0, p, d)
 #         } else {
 #             Q <- Z / Z_norm
 #         }
 #     } else {
 #         # Detect zero columns.
 #         zeroColumn <- colSums(abs(Z)) < tol
 #         if (all(zeroColumn)) {
 #             Q <- matrix(0, p, d)
 #         } else if (any(zeroColumn)) {
 #             Q <- matrix(0, p, d)
 #             Q[, !zeroColumn] <- qr.Q(qr(Z))
 #         } else {
 #             Q <- qr.Q(qr(Z))
 #         }
 #     }
 #     list(Z = Z, Q = Q, iter.inner = if (use.C) NA else iter.inner,
 #          lambda = lambda)
 # }
--- a/tensorPredictors/R/dist_projection.R
+++ b/tensorPredictors/R/dist_projection.R
@ -0,0 +1,27 @@
 #' Porjection Distance of two matrices
 #'
 #' Defined as sine of the maximum principal angle between the column spaces
 #' of the matrices
 #'      max{ sin theta_i, i = 1, ..., min(d1, d2) }
 #' 
 #' @param A,B matrices of size \eqn{p\times d_1} and \eqn{p\times d_2}.
 #'
 #' @export
 dist.projection <- function(A, B, is.ortho = FALSE,
    tol = sqrt(.Machine$double.eps)
 ) {
    if (!is.ortho) {
        qrA <- qr(A, tol)
        A <- qr.Q(qrA)[, seq_len(qrA$rank), drop = FALSE]
        qrB <- qr(B, tol)
        B <- qr.Q(qrB)[, seq_len(qrB$rank), drop = FALSE]
    }
    if (ncol(A) == 0L && ncol(B) == 0L) {
        0
    } else if (ncol(A) == 0L || ncol(B) == 0L) {
        1
    } else {
        sin(acos(min(c(La.svd(crossprod(A, B), 0, 0)$d, 1))))
    }
 }
--- a/tensorPredictors/R/dist_subspace.R
+++ b/tensorPredictors/R/dist_subspace.R
@ -14,18 +14,30 @@
 #'  subspaces of different dimensions" <arXiv:1407.0900>
 #' 
 #' @export
-dist.subspace <- function (A, B, is.ortho = FALSE, normalize = FALSE) {
+dist.subspace <- function (A, B, is.ortho = FALSE, normalize = FALSE,
    tol = sqrt(.Machine$double.eps)
 ) {
    if (!is.matrix(A)) A <- as.matrix(A)
    if (!is.matrix(B)) B <- as.matrix(B)
-    if (is.ortho) {
+    if (!is.ortho) {
-        PA <- tcrossprod(A, A)
+        qrA <- qr(A, tol)
-        PB <- tcrossprod(B, B)
+        if (qrA$rank < ncol(A)) {
-    } else {
+            A <- qr.Q(qrA)[, abs(diag(qr.R(qrA))) > tol, drop = FALSE]
-        PA <- A %*% solve(t(A) %*% A, t(A))
+        } else {
-        PB <- B %*% solve(t(B) %*% B, t(B))
+            A <- qr.Q(qrA)
        }
        qrB <- qr(B, tol)
        if (qrB$rank < ncol(B)) {
            B <- qr.Q(qrB)[, abs(diag(qr.R(qrB))) > tol, drop = FALSE]
        } else {
            B <- qr.Q(qrB)
        }
    }
    PA <- tcrossprod(A, A)
    PB <- tcrossprod(B, B)
    if (normalize) {
        rankSum <- ncol(A) + ncol(B)
        c <- 1 / sqrt(min(rankSum, 2 * nrow(A) - rankSum))
--- a/tensorPredictors/src/poi.c
+++ b/tensorPredictors/src/poi.c
@ -1,10 +1,18 @@
 #include <math.h>
 /**
 * 
 * NOTE: CURRENTLY NOT IN USE!
 * 
 */
 #include <R.h>
 #include <Rinternals.h>
 /* invoced by .Call */
-extern SEXP FastPOI_C_sub(SEXP in_A, SEXP in_B, SEXP in_Delta, SEXP in_lambda, SEXP in_maxit) {
+extern SEXP FastPOI_C_sub(SEXP in_B, SEXP in_Delta,
    SEXP in_lambda, SEXP in_maxit, SEXP in_tol
 ) {
    int i, j, k, g;
    int p = nrows(in_Delta);
@ -16,10 +24,10 @@ extern SEXP FastPOI_C_sub(SEXP in_A, SEXP in_B, SEXP in_Delta, SEXP in_lambda, S
    double* Zold = (double*)R_alloc(p * d, sizeof(double));
    double* Delta = REAL(in_Delta);
    double* a = (double*)R_alloc(d, sizeof(double));
    double* A = REAL(in_A);
    double* B = REAL(in_B);
    double a_norm;
    double lambda = asReal(in_lambda);
    double tol = asReal(in_tol);
    double scale;
    double res;
@ -55,7 +63,6 @@ extern SEXP FastPOI_C_sub(SEXP in_A, SEXP in_B, SEXP in_Delta, SEXP in_lambda, S
            for (j = 0; j < d; ++j) {
                Z[j * p + g] = scale * a[j];
            }
        }
        // Copy Z to Zold and check break condition.
@ -64,7 +71,7 @@ extern SEXP FastPOI_C_sub(SEXP in_A, SEXP in_B, SEXP in_Delta, SEXP in_lambda, S
            res += (Z[j] - Zold[j]) * (Z[j] - Zold[j]);
            Zold[j] = Z[j];
        }
-        if (res < 1e-6) {
+        if (res < tol) {
            break;
        }