From b0151dfafb6a226f342ac33c517d9b191ed31b0c Mon Sep 17 00:00:00 2001
From: daniel <daniel@kapla.at>
Date: Fri, 12 Nov 2021 18:22:45 +0100
Subject: [PATCH] wip: POI

---
 tensorPredictors/R/CISE.R |  46 ++++---
 tensorPredictors/R/GEP.R  |  61 ++++++++++
 tensorPredictors/R/POI.R  | 245 +++++++++++++++++++++++++++++---------
 3 files changed, 282 insertions(+), 70 deletions(-)
 create mode 100644 tensorPredictors/R/GEP.R

diff --git a/tensorPredictors/R/CISE.R b/tensorPredictors/R/CISE.R
index 5648617..5795615 100644
--- a/tensorPredictors/R/CISE.R
+++ b/tensorPredictors/R/CISE.R
@@ -87,8 +87,8 @@
 #' @suggest RSpectra
 #'
 #' @export
-CISE <- function(M, N, d = 1L, max.iter = 100L, Theta = NULL,
-    tol.norm = 1e-6, tol.break = 1e-3, r = 0.5
+CISE <- function(M, N, d = 1L, method = "PFC", max.iter = 100L, Theta = NULL,
+    tol.norm = 1e-6, tol.break = 1e-6, r = 0.5
 ) {
 
     isrN <- matpow(N, -0.5)     # N^-1/2 ... Inverse Square-Root of N
@@ -111,22 +111,28 @@ CISE <- function(M, N, d = 1L, max.iter = 100L, Theta = NULL,
     }
 
     norms <- sqrt(rowSums(V.init^2)) # row norms of V
-    theta.vec <- 0.5 * ifelse(norms < tol.norm, 0, norms^(-r))
+    theta.scale <- 0.5 * ifelse(norms < tol.norm, 0, norms^(-r))
 
     # For each penalty candidate
     fits <- lapply(Theta, function(theta) {
         # Step 2: Iteratively optimize constraint GEP
         V <- V.init
-        dropped <- norms < tol.norm
+        dropped <- rep(FALSE, nrow(M))  # Keep track of dropped variables
         for (iter in seq_len(max.iter)) {
-            # Approx. penalty term derivative at current position
+            # Compute current row norms
             norms <- sqrt(rowSums(V^2)) # row norms of V
-            dropped <- dropped | (norms < tol.norm)
-            h <- ifelse(dropped, 0, theta * (theta.vec / norms))
+            # Check if variables are dropped. If so, update dropped and
+            # recompute the inverse square root of N
+            if (any(norms < tol.norm)) {
+                dropped[!dropped] <- norms < tol.norm
+                norms <- norms[!(norms < tol.norm)]
+                isrN <- matpow(N[!dropped, !dropped], -0.5)
+            }
+            # Approx. penalty term derivative at current position
+            h <- theta * (theta.scale[!dropped] / norms)
 
-            # Updated G at current position (scaling by 1/2 done in `theta.vec`)
-            A <- G - (isrN %*% (h * isrN))
-            A[dropped, dropped] <- 0
+            # Updated G at current position (scaling by 1/2 done in `theta.scale`)
+            A <- G[!dropped, !dropped] - (isrN %*% (h * isrN))
             # Solve next iteration GEP
             Gamma <- if (requireNamespace("RSpectra", quietly = TRUE)) {
                 RSpectra::eigs_sym(A, d)$vectors
@@ -135,14 +141,22 @@ CISE <- function(M, N, d = 1L, max.iter = 100L, Theta = NULL,
             }
             V.last <- V
             V <- isrN %*% Gamma
-            V[dropped, ] <- 0
 
-            # break condition
-            if (dist.subspace(V.last, V, normalize = TRUE) < tol.break) {
+            # Check if there are enough variables left
+            if (nrow(V) < d + 1) {
+                break
+            }
+            # Break dondition (only when nothing dropped)
+            if (nrow(V.last) == nrow(V)
+            && dist.subspace(V.last, V, normalize = TRUE) < tol.break) {
                 break
             }
         }
 
+        # Recreate dropped variables and fill parameters with 0.
+        V.full <- matrix(0, nrow(M), d)
+        V.full[!dropped, ] <- V
+
         # df <- (sum(!dropped) - d) * d
         # BIC <- -sum(V * (M %*% V)) + log(n) * df / n
         # cat("theta:",  sprintf('%7.3f', range(theta)),
@@ -153,10 +167,12 @@ CISE <- function(M, N, d = 1L, max.iter = 100L, Theta = NULL,
         #     # "- ", paste(sprintf('%6.2f', norms), collapse = ", "),
         #     '\n')
 
-        structure(V,
+        structure(qr.Q(qr(V.full)),
             theta = theta, iter = iter, BIC = BIC, df = df,
             dist = dist.subspace(V.last, V, normalize = TRUE))
     })
 
-    structure(fits, class = c("tensorPredictors", "CISE"))
+    structure(fits,
+        call = match.call(),
+        class = c("tensorPredictors", "CISE"))
 }
diff --git a/tensorPredictors/R/GEP.R b/tensorPredictors/R/GEP.R
new file mode 100644
index 0000000..a0639c5
--- /dev/null
+++ b/tensorPredictors/R/GEP.R
@@ -0,0 +1,61 @@
+#' Compute eigenvalue problem equiv. to method.
+#'
+#' Computes the matrices \eqn{A, B} of a generalized eigenvalue problem
+#' \deqn{A X = B Lambda} with \eqn{X} the matrix of eigenvectors and
+#' \eqn{Lambda} diagonal matrix of eigenvalues.
+#'
+#' @param X predictor matrix.
+#' @param y responses (see details).
+#' @param method One of "pca", "pfc", ... TODO: finish, add more, ...
+#'
+#' @returns list of matrices \code{lhs} for \eqn{A} and \code{rhs} for \eqn{B}.
+#'
+#' @seealso section 2.1 of "Coordinate-Independent Sparse Sufficient Dimension
+#' Reduction and Variable Selection" By Xin Chen, Changliang Zou and
+#' R. Dennis Cook.
+#'
+GEP <- function(X, y, method = c('pfc', 'pca', 'sir', 'save'), ...,
+    nr.slices = 10, ensamble = list(abs, identity, function(x) x^2)
+) {
+    method <- match.arg(method)
+
+    if (method == 'pca') {
+        lhs <- cov(X)           # covariance
+        rhs <- diag(ncol(X))    # identity
+    } else if (method == 'pfc') {
+        X <- scale(X, scale = FALSE, center = TRUE)
+
+        Fy <- sapply(ensamble, do.call, list(y))
+        Fy <- scale(Fy, scale = FALSE, center = TRUE)
+
+        # Compute Sigma_fit (the sample covariance matrix of the fitted vectors).
+        P_Fy <- Fy %*% solve(crossprod(Fy), t(Fy))
+        lhs <- crossprod(X, P_Fy %*% X) / nrow(X)
+        # Estimate Sigma (the MLE sample covariance matrix).
+        rhs <- crossprod(X) / nrow(X)
+    } else if (method == 'sir') {
+        if (NCOL(y) != 1) {
+            stop('For SIR only univariate response suported.')
+        }
+        # Build slices (if not categorical)
+        if (is.factor(y)) {
+            slice.index <- y
+        } else {
+            # TODO: make this proper, just for a bit of playing!!!
+            slice.size <- round(length(y) / nr.slices)
+            slice.index <- factor((rank(y) - 1) %/% slice.size)
+        }
+
+        # Covariance of slice means, Cov(E[X - E X | y])
+        lhs <- cov(t(sapply(levels(slice.index), function(i) {
+            colMeans(X[slice.index == i, , drop = FALSE])
+        })))
+        # Sample covariance
+        rhs <- cov(X)
+    } else {
+        stop('Not implemented!')
+    }
+
+    # Return left- and right-hand-side of GEP equation system.
+    list(lhs = lhs, rhs = rhs)
+}
diff --git a/tensorPredictors/R/POI.R b/tensorPredictors/R/POI.R
index 085d55a..8c73e79 100644
--- a/tensorPredictors/R/POI.R
+++ b/tensorPredictors/R/POI.R
@@ -1,3 +1,40 @@
+solve.gep <- function(A, B, d = nrow(A)) {
+    isrB <- matpow(B, -0.5)
+
+    if (requireNamespace("RSpectra", quietly = TRUE) && d < nrow(A)) {
+        eig <- RSpectra::eigs_sym(isrB %*% A %*% isrB, d)
+    } else {
+        eig <- eigen(isrB %*% A %*% isrB, symmetric = TRUE)
+    }
+
+    list(vectors = isrB %*% eig$vectors, values = eig$values)
+}
+
+POI.lambda.max <- function(A, d = 1L, method = c('POI-C', 'POI-L', 'FastPOI-C', 'FastPOI-L')) {
+    method <- match.arg(method)
+
+    if (method %in% c('POI-C', 'POI-L')) {
+        A2 <- apply(apply(A^2, 2, sort, decreasing = TRUE), 2, cumsum)
+        lambda.max <- sqrt(apply(A2[1:d, , drop = FALSE], 1, max))
+        if (method == 'POI-C') {
+            lambda.max[d]
+        } else {
+            lambda.max[1]
+        }
+    } else {
+        if (requireNamespace("RSpectra", quietly = TRUE)) {
+            vec <- RSpectra::eigs_sym(A, d)$vectors
+        } else {
+            vec <- eigen(A, symmetric = TRUE)$vectors
+        }
+        if (method == 'FastPOI-C') {
+            sqrt(max(rowSums(vec^2)))
+        } else { # 'FastPOI-L'
+            max(abs(vec))
+        }
+    }
+}
+
 #' Penalysed Orthogonal Iteration.
 #'
 #' @param lambda Default: 0.75 * lambda_max for FastPOI-C method.
@@ -6,73 +43,171 @@
 #'  dyn.load('../tensor_predictors/poi.so')
 #'
 #' @export
-POI <- function(A, B, d,
-                lambda = 0.75 * sqrt(max(rowSums(Delta^2))),
-                update.tol = 1e-3,
-                tol = 100 * .Machine$double.eps,
-                maxit = 400L,
-                # maxit.outer = maxit,
-                maxit.inner = maxit,
-                use.C = FALSE,
-                method = 'FastPOI-C') {
+POI <- function(A, B, d = 1L, sparsity = 0.5,
+    method = c('POI-C', 'FastPOI-C'), # TODO: Maybe implement the the lasso loss too
+    iter.outer = 100L, iter.inner = 500L,
+    tol = sqrt(.Machine$double.eps)
+) {
+    method <- match.arg(method)
 
-    # TODO:
-    stopifnot(method == 'FastPOI-C')
+    # Compute penalty parameter lambda
+    lambda <- sparsity * POI.lambda.max(A, d, method)
 
-    if (requireNamespace("RSpectra", quietly = TRUE)) {
+    # Ensure RHS B to be positive definite
+    if (missing(B)) {
+        B <- diag(nrow(A))
+    } else {
+        rankB <- qr(B, tol)$rank
+        if (rankB < nrow(B)) {
+            diag(B) <- diag(B) + log(nrow(B)) / rankB
+        }
+    }
+
+    # In case of zero penalty compute ordinary GEP solution
+    if (lambda == 0) {
+        eig <- solve.eig(A, B, d)
+        return(structure(list(
+            U = eig$vectors, d = eig$values,
+            lambda = 0, call = match.call()
+        ), class = c("tensor_predictor", "POI")))
+    }
+
+    # Set initial values
+    if (requireNamespace("RSpectra", quietly = TRUE) && nrow(A) < d) {
         Delta <- RSpectra::eigs_sym(A, d)$vectors
     } else {
         Delta <- eigen(A, symmetric = TRUE)$vectors[, 1:d, drop = FALSE]
     }
+    Q <- Delta
+    Z <- matrix(0, nrow(Q), ncol(Q))
 
-    # Set initial value.
-    Z <- Delta
+    # Outer loop (iteration)
+    for (i in seq_len(iter.outer)) {
+        Q.last <- Q # for break condition
 
-    # Step 1: Optimization.
-    # The "inner" optimization loop, aka repeated coordinate optimization.
-    if (use.C) {
-        Z <- .Call('FastPOI_C_sub', A, B, Delta, lambda, as.integer(maxit.inner),
-                   PACKAGE = 'tensorPredictors')
-    } else {
-        p <- nrow(Z)
-        for (iter.inner in 1:maxit.inner) {
-            Zold <- Z
-            for (g in 1:p) {
-                a <- Delta[g, ] - B[g, ] %*% Z + B[g, g] * Z[g, ]
-                a_norm <- sqrt(sum(a^2))
-                if (a_norm > lambda) {
-                    Z[g, ] <- a * ((1 - lambda / a_norm) / B[g, g])
-                } else {
-                    Z[g, ] <- 0
-                }
+        # Step 1: Solve B Z_i = A Q_{i-1} for Z_i
+        Delta <- crossprod(A, Q)
+        # Inner Loop
+        for (j in seq_len(iter.inner)) {
+            Z.last <- Z # for break condition
+
+            traces <- Delta - B %*% Z + diag(B) * Z
+            Z <- traces * (pmax(1 - lambda / sqrt(rowSums(traces^2)), 0) / diag(B))
+
+            # Inner break condition
+            if (norm(Z.last - Z, 'F') < tol) {
+                break
             }
-            if (norm(Z - Zold, 'F') < update.tol) {
-                break;
+        }
+
+        # Step 2: QR decomposition of Z_i = Q_i R_i.
+        if (d == 1L) {
+            Z.norm <- norm(Z, 'F')
+            if (Z.norm < tol) {
+                Q <- matrix(0, p, d)
+            } else {
+                Q <- Z / Z.norm
             }
+        } else {
+            # Detect zero columns.
+            zero.col <- colSums(abs(Z)) < tol
+            if (all(zero.col)) {
+                Q <- matrix(0, p, d)
+            } else if (any(zero.col)) {
+                Q <- matrix(0, p, d)
+                Q[, !zero.col] <- qr.Q(qr(Z[, !zero.col]))
+            } else {
+                Q <- qr.Q(qr(Z))
+            }
+        }
+
+        # In case of fast POI, only one iteration
+        if (startsWith(method, 'Fast')) {
+            break
+        }
+        if (norm(tcrossprod(Q, Q) - tcrossprod(Q.last, Q.last), 'F') < tol) {
+            break
         }
     }
 
-    # Step 2: QR decomposition.
-    if (d == 1L) {
-        Z_norm <- sqrt(sum(Z^2))
-        if (Z_norm < tol) {
-            Q <- matrix(0, p, d)
-        } else {
-            Q <- Z / Z_norm
-        }
-    } else {
-        # Detect zero columns.
-        zeroColumn <- colSums(abs(Z)) < tol
-        if (all(zeroColumn)) {
-            Q <- matrix(0, p, d)
-        } else if (any(zeroColumn)) {
-            Q <- matrix(0, p, d)
-            Q[, !zeroColumn] <- qr.Q(qr(Z))
-        } else {
-            Q <- qr.Q(qr(Z))
-        }
-    }
+    # TODO: Finish with transformation to original solution U of
+    #   A U = B U Lambda.
 
-    return(list(Z = Z, Q = Q, iter.inner = if (use.C) NA else iter.inner,
-                lambda = lambda))
+    structure(list(
+        Z = Z, Q = Q,
+        lambda = lambda,
+        call = match.call()
+    ), class = c("tensor_predictor", "POI"))
 }
+
+
+# POI.bak <- function(A, B, d,
+#                 lambda = 0.75 * sqrt(max(rowSums(Delta^2))),
+#                 update.tol = 1e-3,
+#                 tol = 100 * .Machine$double.eps,
+#                 maxit = 400L,
+#                 # maxit.outer = maxit,
+#                 maxit.inner = maxit,
+#                 use.C = FALSE,
+#                 method = 'FastPOI-C') {
+
+#     # TODO:
+#     stopifnot(method == 'FastPOI-C')
+
+#     if (requireNamespace("RSpectra", quietly = TRUE)) {
+#         Delta <- RSpectra::eigs_sym(A, d)$vectors
+#     } else {
+#         Delta <- eigen(A, symmetric = TRUE)$vectors[, 1:d, drop = FALSE]
+#     }
+
+#     # Set initial value.
+#     Z <- Delta
+
+#     # Step 1: Optimization.
+#     # The "inner" optimization loop, aka repeated coordinate optimization.
+#     if (use.C) {
+#         Z <- .Call('FastPOI_C_sub', A, B, Delta, lambda, as.integer(maxit.inner),
+#                    PACKAGE = 'tensorPredictors')
+#     } else {
+#         p <- nrow(Z)
+#         for (iter.inner in 1:maxit.inner) {
+#             Zold <- Z
+#             for (g in 1:p) {
+#                 a <- Delta[g, ] - B[g, ] %*% Z + B[g, g] * Z[g, ]
+#                 a_norm <- sqrt(sum(a^2))
+#                 if (a_norm > lambda) {
+#                     Z[g, ] <- a * ((1 - lambda / a_norm) / B[g, g])
+#                 } else {
+#                     Z[g, ] <- 0
+#                 }
+#             }
+#             if (norm(Z - Zold, 'F') < update.tol) {
+#                 break
+#             }
+#         }
+#     }
+
+#     # Step 2: QR decomposition.
+#     if (d == 1L) {
+#         Z_norm <- sqrt(sum(Z^2))
+#         if (Z_norm < tol) {
+#             Q <- matrix(0, p, d)
+#         } else {
+#             Q <- Z / Z_norm
+#         }
+#     } else {
+#         # Detect zero columns.
+#         zeroColumn <- colSums(abs(Z)) < tol
+#         if (all(zeroColumn)) {
+#             Q <- matrix(0, p, d)
+#         } else if (any(zeroColumn)) {
+#             Q <- matrix(0, p, d)
+#             Q[, !zeroColumn] <- qr.Q(qr(Z))
+#         } else {
+#             Q <- qr.Q(qr(Z))
+#         }
+#     }
+
+#     list(Z = Z, Q = Q, iter.inner = if (use.C) NA else iter.inner,
+#          lambda = lambda)
+# }