From b0151dfafb6a226f342ac33c517d9b191ed31b0c Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 12 Nov 2021 18:22:45 +0100 Subject: [PATCH] wip: POI --- tensorPredictors/R/CISE.R | 46 ++++--- tensorPredictors/R/GEP.R | 61 ++++++++++ tensorPredictors/R/POI.R | 245 +++++++++++++++++++++++++++++--------- 3 files changed, 282 insertions(+), 70 deletions(-) create mode 100644 tensorPredictors/R/GEP.R diff --git a/tensorPredictors/R/CISE.R b/tensorPredictors/R/CISE.R index 5648617..5795615 100644 --- a/tensorPredictors/R/CISE.R +++ b/tensorPredictors/R/CISE.R @@ -87,8 +87,8 @@ #' @suggest RSpectra #' #' @export -CISE <- function(M, N, d = 1L, max.iter = 100L, Theta = NULL, - tol.norm = 1e-6, tol.break = 1e-3, r = 0.5 +CISE <- function(M, N, d = 1L, method = "PFC", max.iter = 100L, Theta = NULL, + tol.norm = 1e-6, tol.break = 1e-6, r = 0.5 ) { isrN <- matpow(N, -0.5) # N^-1/2 ... Inverse Square-Root of N @@ -111,22 +111,28 @@ CISE <- function(M, N, d = 1L, max.iter = 100L, Theta = NULL, } norms <- sqrt(rowSums(V.init^2)) # row norms of V - theta.vec <- 0.5 * ifelse(norms < tol.norm, 0, norms^(-r)) + theta.scale <- 0.5 * ifelse(norms < tol.norm, 0, norms^(-r)) # For each penalty candidate fits <- lapply(Theta, function(theta) { # Step 2: Iteratively optimize constraint GEP V <- V.init - dropped <- norms < tol.norm + dropped <- rep(FALSE, nrow(M)) # Keep track of dropped variables for (iter in seq_len(max.iter)) { - # Approx. penalty term derivative at current position + # Compute current row norms norms <- sqrt(rowSums(V^2)) # row norms of V - dropped <- dropped | (norms < tol.norm) - h <- ifelse(dropped, 0, theta * (theta.vec / norms)) + # Check if variables are dropped. If so, update dropped and + # recompute the inverse square root of N + if (any(norms < tol.norm)) { + dropped[!dropped] <- norms < tol.norm + norms <- norms[!(norms < tol.norm)] + isrN <- matpow(N[!dropped, !dropped], -0.5) + } + # Approx. penalty term derivative at current position + h <- theta * (theta.scale[!dropped] / norms) - # Updated G at current position (scaling by 1/2 done in `theta.vec`) - A <- G - (isrN %*% (h * isrN)) - A[dropped, dropped] <- 0 + # Updated G at current position (scaling by 1/2 done in `theta.scale`) + A <- G[!dropped, !dropped] - (isrN %*% (h * isrN)) # Solve next iteration GEP Gamma <- if (requireNamespace("RSpectra", quietly = TRUE)) { RSpectra::eigs_sym(A, d)$vectors @@ -135,14 +141,22 @@ CISE <- function(M, N, d = 1L, max.iter = 100L, Theta = NULL, } V.last <- V V <- isrN %*% Gamma - V[dropped, ] <- 0 - # break condition - if (dist.subspace(V.last, V, normalize = TRUE) < tol.break) { + # Check if there are enough variables left + if (nrow(V) < d + 1) { + break + } + # Break dondition (only when nothing dropped) + if (nrow(V.last) == nrow(V) + && dist.subspace(V.last, V, normalize = TRUE) < tol.break) { break } } + # Recreate dropped variables and fill parameters with 0. + V.full <- matrix(0, nrow(M), d) + V.full[!dropped, ] <- V + # df <- (sum(!dropped) - d) * d # BIC <- -sum(V * (M %*% V)) + log(n) * df / n # cat("theta:", sprintf('%7.3f', range(theta)), @@ -153,10 +167,12 @@ CISE <- function(M, N, d = 1L, max.iter = 100L, Theta = NULL, # # "- ", paste(sprintf('%6.2f', norms), collapse = ", "), # '\n') - structure(V, + structure(qr.Q(qr(V.full)), theta = theta, iter = iter, BIC = BIC, df = df, dist = dist.subspace(V.last, V, normalize = TRUE)) }) - structure(fits, class = c("tensorPredictors", "CISE")) + structure(fits, + call = match.call(), + class = c("tensorPredictors", "CISE")) } diff --git a/tensorPredictors/R/GEP.R b/tensorPredictors/R/GEP.R new file mode 100644 index 0000000..a0639c5 --- /dev/null +++ b/tensorPredictors/R/GEP.R @@ -0,0 +1,61 @@ +#' Compute eigenvalue problem equiv. to method. +#' +#' Computes the matrices \eqn{A, B} of a generalized eigenvalue problem +#' \deqn{A X = B Lambda} with \eqn{X} the matrix of eigenvectors and +#' \eqn{Lambda} diagonal matrix of eigenvalues. +#' +#' @param X predictor matrix. +#' @param y responses (see details). +#' @param method One of "pca", "pfc", ... TODO: finish, add more, ... +#' +#' @returns list of matrices \code{lhs} for \eqn{A} and \code{rhs} for \eqn{B}. +#' +#' @seealso section 2.1 of "Coordinate-Independent Sparse Sufficient Dimension +#' Reduction and Variable Selection" By Xin Chen, Changliang Zou and +#' R. Dennis Cook. +#' +GEP <- function(X, y, method = c('pfc', 'pca', 'sir', 'save'), ..., + nr.slices = 10, ensamble = list(abs, identity, function(x) x^2) +) { + method <- match.arg(method) + + if (method == 'pca') { + lhs <- cov(X) # covariance + rhs <- diag(ncol(X)) # identity + } else if (method == 'pfc') { + X <- scale(X, scale = FALSE, center = TRUE) + + Fy <- sapply(ensamble, do.call, list(y)) + Fy <- scale(Fy, scale = FALSE, center = TRUE) + + # Compute Sigma_fit (the sample covariance matrix of the fitted vectors). + P_Fy <- Fy %*% solve(crossprod(Fy), t(Fy)) + lhs <- crossprod(X, P_Fy %*% X) / nrow(X) + # Estimate Sigma (the MLE sample covariance matrix). + rhs <- crossprod(X) / nrow(X) + } else if (method == 'sir') { + if (NCOL(y) != 1) { + stop('For SIR only univariate response suported.') + } + # Build slices (if not categorical) + if (is.factor(y)) { + slice.index <- y + } else { + # TODO: make this proper, just for a bit of playing!!! + slice.size <- round(length(y) / nr.slices) + slice.index <- factor((rank(y) - 1) %/% slice.size) + } + + # Covariance of slice means, Cov(E[X - E X | y]) + lhs <- cov(t(sapply(levels(slice.index), function(i) { + colMeans(X[slice.index == i, , drop = FALSE]) + }))) + # Sample covariance + rhs <- cov(X) + } else { + stop('Not implemented!') + } + + # Return left- and right-hand-side of GEP equation system. + list(lhs = lhs, rhs = rhs) +} diff --git a/tensorPredictors/R/POI.R b/tensorPredictors/R/POI.R index 085d55a..8c73e79 100644 --- a/tensorPredictors/R/POI.R +++ b/tensorPredictors/R/POI.R @@ -1,3 +1,40 @@ +solve.gep <- function(A, B, d = nrow(A)) { + isrB <- matpow(B, -0.5) + + if (requireNamespace("RSpectra", quietly = TRUE) && d < nrow(A)) { + eig <- RSpectra::eigs_sym(isrB %*% A %*% isrB, d) + } else { + eig <- eigen(isrB %*% A %*% isrB, symmetric = TRUE) + } + + list(vectors = isrB %*% eig$vectors, values = eig$values) +} + +POI.lambda.max <- function(A, d = 1L, method = c('POI-C', 'POI-L', 'FastPOI-C', 'FastPOI-L')) { + method <- match.arg(method) + + if (method %in% c('POI-C', 'POI-L')) { + A2 <- apply(apply(A^2, 2, sort, decreasing = TRUE), 2, cumsum) + lambda.max <- sqrt(apply(A2[1:d, , drop = FALSE], 1, max)) + if (method == 'POI-C') { + lambda.max[d] + } else { + lambda.max[1] + } + } else { + if (requireNamespace("RSpectra", quietly = TRUE)) { + vec <- RSpectra::eigs_sym(A, d)$vectors + } else { + vec <- eigen(A, symmetric = TRUE)$vectors + } + if (method == 'FastPOI-C') { + sqrt(max(rowSums(vec^2))) + } else { # 'FastPOI-L' + max(abs(vec)) + } + } +} + #' Penalysed Orthogonal Iteration. #' #' @param lambda Default: 0.75 * lambda_max for FastPOI-C method. @@ -6,73 +43,171 @@ #' dyn.load('../tensor_predictors/poi.so') #' #' @export -POI <- function(A, B, d, - lambda = 0.75 * sqrt(max(rowSums(Delta^2))), - update.tol = 1e-3, - tol = 100 * .Machine$double.eps, - maxit = 400L, - # maxit.outer = maxit, - maxit.inner = maxit, - use.C = FALSE, - method = 'FastPOI-C') { +POI <- function(A, B, d = 1L, sparsity = 0.5, + method = c('POI-C', 'FastPOI-C'), # TODO: Maybe implement the the lasso loss too + iter.outer = 100L, iter.inner = 500L, + tol = sqrt(.Machine$double.eps) +) { + method <- match.arg(method) - # TODO: - stopifnot(method == 'FastPOI-C') + # Compute penalty parameter lambda + lambda <- sparsity * POI.lambda.max(A, d, method) - if (requireNamespace("RSpectra", quietly = TRUE)) { + # Ensure RHS B to be positive definite + if (missing(B)) { + B <- diag(nrow(A)) + } else { + rankB <- qr(B, tol)$rank + if (rankB < nrow(B)) { + diag(B) <- diag(B) + log(nrow(B)) / rankB + } + } + + # In case of zero penalty compute ordinary GEP solution + if (lambda == 0) { + eig <- solve.eig(A, B, d) + return(structure(list( + U = eig$vectors, d = eig$values, + lambda = 0, call = match.call() + ), class = c("tensor_predictor", "POI"))) + } + + # Set initial values + if (requireNamespace("RSpectra", quietly = TRUE) && nrow(A) < d) { Delta <- RSpectra::eigs_sym(A, d)$vectors } else { Delta <- eigen(A, symmetric = TRUE)$vectors[, 1:d, drop = FALSE] } + Q <- Delta + Z <- matrix(0, nrow(Q), ncol(Q)) - # Set initial value. - Z <- Delta + # Outer loop (iteration) + for (i in seq_len(iter.outer)) { + Q.last <- Q # for break condition - # Step 1: Optimization. - # The "inner" optimization loop, aka repeated coordinate optimization. - if (use.C) { - Z <- .Call('FastPOI_C_sub', A, B, Delta, lambda, as.integer(maxit.inner), - PACKAGE = 'tensorPredictors') - } else { - p <- nrow(Z) - for (iter.inner in 1:maxit.inner) { - Zold <- Z - for (g in 1:p) { - a <- Delta[g, ] - B[g, ] %*% Z + B[g, g] * Z[g, ] - a_norm <- sqrt(sum(a^2)) - if (a_norm > lambda) { - Z[g, ] <- a * ((1 - lambda / a_norm) / B[g, g]) - } else { - Z[g, ] <- 0 - } + # Step 1: Solve B Z_i = A Q_{i-1} for Z_i + Delta <- crossprod(A, Q) + # Inner Loop + for (j in seq_len(iter.inner)) { + Z.last <- Z # for break condition + + traces <- Delta - B %*% Z + diag(B) * Z + Z <- traces * (pmax(1 - lambda / sqrt(rowSums(traces^2)), 0) / diag(B)) + + # Inner break condition + if (norm(Z.last - Z, 'F') < tol) { + break } - if (norm(Z - Zold, 'F') < update.tol) { - break; + } + + # Step 2: QR decomposition of Z_i = Q_i R_i. + if (d == 1L) { + Z.norm <- norm(Z, 'F') + if (Z.norm < tol) { + Q <- matrix(0, p, d) + } else { + Q <- Z / Z.norm } + } else { + # Detect zero columns. + zero.col <- colSums(abs(Z)) < tol + if (all(zero.col)) { + Q <- matrix(0, p, d) + } else if (any(zero.col)) { + Q <- matrix(0, p, d) + Q[, !zero.col] <- qr.Q(qr(Z[, !zero.col])) + } else { + Q <- qr.Q(qr(Z)) + } + } + + # In case of fast POI, only one iteration + if (startsWith(method, 'Fast')) { + break + } + if (norm(tcrossprod(Q, Q) - tcrossprod(Q.last, Q.last), 'F') < tol) { + break } } - # Step 2: QR decomposition. - if (d == 1L) { - Z_norm <- sqrt(sum(Z^2)) - if (Z_norm < tol) { - Q <- matrix(0, p, d) - } else { - Q <- Z / Z_norm - } - } else { - # Detect zero columns. - zeroColumn <- colSums(abs(Z)) < tol - if (all(zeroColumn)) { - Q <- matrix(0, p, d) - } else if (any(zeroColumn)) { - Q <- matrix(0, p, d) - Q[, !zeroColumn] <- qr.Q(qr(Z)) - } else { - Q <- qr.Q(qr(Z)) - } - } + # TODO: Finish with transformation to original solution U of + # A U = B U Lambda. - return(list(Z = Z, Q = Q, iter.inner = if (use.C) NA else iter.inner, - lambda = lambda)) + structure(list( + Z = Z, Q = Q, + lambda = lambda, + call = match.call() + ), class = c("tensor_predictor", "POI")) } + + +# POI.bak <- function(A, B, d, +# lambda = 0.75 * sqrt(max(rowSums(Delta^2))), +# update.tol = 1e-3, +# tol = 100 * .Machine$double.eps, +# maxit = 400L, +# # maxit.outer = maxit, +# maxit.inner = maxit, +# use.C = FALSE, +# method = 'FastPOI-C') { + +# # TODO: +# stopifnot(method == 'FastPOI-C') + +# if (requireNamespace("RSpectra", quietly = TRUE)) { +# Delta <- RSpectra::eigs_sym(A, d)$vectors +# } else { +# Delta <- eigen(A, symmetric = TRUE)$vectors[, 1:d, drop = FALSE] +# } + +# # Set initial value. +# Z <- Delta + +# # Step 1: Optimization. +# # The "inner" optimization loop, aka repeated coordinate optimization. +# if (use.C) { +# Z <- .Call('FastPOI_C_sub', A, B, Delta, lambda, as.integer(maxit.inner), +# PACKAGE = 'tensorPredictors') +# } else { +# p <- nrow(Z) +# for (iter.inner in 1:maxit.inner) { +# Zold <- Z +# for (g in 1:p) { +# a <- Delta[g, ] - B[g, ] %*% Z + B[g, g] * Z[g, ] +# a_norm <- sqrt(sum(a^2)) +# if (a_norm > lambda) { +# Z[g, ] <- a * ((1 - lambda / a_norm) / B[g, g]) +# } else { +# Z[g, ] <- 0 +# } +# } +# if (norm(Z - Zold, 'F') < update.tol) { +# break +# } +# } +# } + +# # Step 2: QR decomposition. +# if (d == 1L) { +# Z_norm <- sqrt(sum(Z^2)) +# if (Z_norm < tol) { +# Q <- matrix(0, p, d) +# } else { +# Q <- Z / Z_norm +# } +# } else { +# # Detect zero columns. +# zeroColumn <- colSums(abs(Z)) < tol +# if (all(zeroColumn)) { +# Q <- matrix(0, p, d) +# } else if (any(zeroColumn)) { +# Q <- matrix(0, p, d) +# Q[, !zeroColumn] <- qr.Q(qr(Z)) +# } else { +# Q <- qr.Q(qr(Z)) +# } +# } + +# list(Z = Z, Q = Q, iter.inner = if (use.C) NA else iter.inner, +# lambda = lambda) +# }