diff --git a/tensorPredictors/NAMESPACE b/tensorPredictors/NAMESPACE index 2706559..4db235d 100644 --- a/tensorPredictors/NAMESPACE +++ b/tensorPredictors/NAMESPACE @@ -1,9 +1,11 @@ # Generated by roxygen2: do not edit by hand +export(CISE) export(LSIR) export(PCA2d) export(POI) export(approx.kronecker) +export(dist.subspace) export(reduce) import(stats) useDynLib(tensorPredictors, .registration = TRUE) diff --git a/tensorPredictors/R/CISE.R b/tensorPredictors/R/CISE.R new file mode 100644 index 0000000..5648617 --- /dev/null +++ b/tensorPredictors/R/CISE.R @@ -0,0 +1,162 @@ + +#' Coordinate-Independent Sparce Estimation. +#' +#' Solves penalized version of a GEP (Generalized Eigenvalue Problem) +#' \deqn{M V = N \Lambda V} +#' with \eqn{\Lambda} a matrix with eigenvalues on the main diagonal and \eqn{V} +#' are the first \eqn{d} eigenvectors. +#' +#' TODO: DOES NOT WORK, DON'T KNOW WHY (contact first author for the Matlab code) +#' +#' @param M is the GEP's left hand side +#' @param N is the GEP's right hand side +#' @param d number of leading eigenvalues, -vectors to be computed +#' @param max.iter maximum number of iterations for iterative optimization +#' @param Theta Penalty parameter, if not provided an reasonable estimate for +#' a grid of parameters is computed. If Theta is a vector (or number), the +#' provided values of Theta are used as penalty parameter candidates. +#' @param tol.norm numerical tolerance for dropping rows +#' @param tol.break break condition tolerance +#' +#' @returns a list +#' +#' @examples \dontrun{ +#' # Study 1-4 from CISE paper +#' dataset <- function(name, n = 60, p = 24) { +#' name <- toupper(name) +#' if (!startsWith('M', name)) { name <- paste0('M', name) } +#' +#' if (name %in% c('M1', 'M2', 'M3')) { +#' Sigma <- 0.5^abs(outer(1:p, 1:p, `-`)) +#' X <- rmvnorm(n, sigma = Sigma) +#' y <- switch(name, +#' M1 = rowSums(X[, 1:3]) + rnorm(n, 0, 0.5), +#' M2 = rowSums(X[, 1:3]) + rnorm(n, 0, 2), +#' M3 = X[, 1] / (0.5 + (X[, 2] + 1.5)^2) + rnorm(n, 0, 0.2) +#' ) +#' B <- switch(name, +#' M1 = as.matrix(as.double(1:p < 4)), +#' M2 = as.matrix(as.double(1:p < 4)), +#' M3 = diag(1, p, 2) +#' ) +#' } else if (name == 'M4') { +#' y <- rnorm(n) +#' Delta <- 0.5^abs(outer(1:p, 1:p, `-`)) +#' Gamma <- 0.5 * cbind( +#' (1:p <= 4), +#' (-(1:p <= 4))^(1:p + 1) +#' ) +#' B <- qr.Q(qr(solve(Delta, Gamma))) +#' X <- cbind(y, y^2) %*% t(Gamma) + +#' matpow(Delta, 0.5) %*% rmvnorm(n, rep(0, p)) +#' } else { +#' stop('Unknown dataset name.') +#' } +#' +#' list(X = X, y = y, B = B) +#' } +#' # Sample dataset +#' n <- 1000 +#' ds <- dataset(3, n = n) +#' # Convert to PFC associated GEP +#' Fy <- with(ds, cbind(abs(y), y, y^2)) +#' P.Fy <- Fy %*% solve(crossprod(Fy), t(Fy)) +#' M <- with(ds, crossprod(X, P.Fy %*% X) / nrow(X)) # Sigma Fit +#' N <- cov(ds$X) # Sigma +#' +#' fits <- CISE(M, N, d = ncol(ds$B), Theta = log(seq(1, exp(1e-3), len = 1000))) +#' +#' BIC <- unlist(Map(attr, fits, 'BIC')) +#' df <- unlist(Map(attr, fits, 'df')) +#' dist <- unlist(Map(attr, fits, 'dist')) +#' iter <- unlist(Map(attr, fits, 'iter')) +#' theta <- unlist(Map(attr, fits, 'theta')) +#' p.theta <- unlist(Map(function(V) sum(rowSums(V^2) > 1e-9), fits)) +#' +#' par(mfrow = c(2, 2)) +#' plot(theta, BIC, type = 'l') +#' plot(theta, p.theta, type = 'l') +#' plot(theta, dist, type = 'l') +#' plot(theta, iter, type = 'l') +#' } +#' +#' @seealso "Coordinate-Independent Sparse Sufficient Dimension +#' Reduction and Variable Selection" By Xin Chen, Changliang Zou and +#' R. Dennis Cook. +#' +#' @suggest RSpectra +#' +#' @export +CISE <- function(M, N, d = 1L, max.iter = 100L, Theta = NULL, + tol.norm = 1e-6, tol.break = 1e-3, r = 0.5 +) { + + isrN <- matpow(N, -0.5) # N^-1/2 ... Inverse Square-Root of N + G <- isrN %*% M %*% isrN # G = N^-1/2 M N^-1/2 + + # Step 1: Solve (ordinary, unconstraint) eigenvalue problem used as an + # initial value for following iterative optimization (Solution of (2.8)) + Gamma <- if (requireNamespace("RSpectra", quietly = TRUE)) { + RSpectra::eigs_sym(G, d)$vectors + } else { + eigen(G, symmetric = TRUE)$vectors[, d, drop = FALSE] + } + V.init <- isrN %*% Gamma + + # Build penalty grid + if (missing(Theta)) { + # TODO: figure out what a good min to max in steps grid is + theta.max <- sqrt(max(rowSums(Gamma^2))) + Theta <- seq(0.01 * theta.max, 0.75 * theta.max, length.out = 10) + } + + norms <- sqrt(rowSums(V.init^2)) # row norms of V + theta.vec <- 0.5 * ifelse(norms < tol.norm, 0, norms^(-r)) + + # For each penalty candidate + fits <- lapply(Theta, function(theta) { + # Step 2: Iteratively optimize constraint GEP + V <- V.init + dropped <- norms < tol.norm + for (iter in seq_len(max.iter)) { + # Approx. penalty term derivative at current position + norms <- sqrt(rowSums(V^2)) # row norms of V + dropped <- dropped | (norms < tol.norm) + h <- ifelse(dropped, 0, theta * (theta.vec / norms)) + + # Updated G at current position (scaling by 1/2 done in `theta.vec`) + A <- G - (isrN %*% (h * isrN)) + A[dropped, dropped] <- 0 + # Solve next iteration GEP + Gamma <- if (requireNamespace("RSpectra", quietly = TRUE)) { + RSpectra::eigs_sym(A, d)$vectors + } else { + eigen(A, symmetric = TRUE)$vectors[, d, drop = FALSE] + } + V.last <- V + V <- isrN %*% Gamma + V[dropped, ] <- 0 + + # break condition + if (dist.subspace(V.last, V, normalize = TRUE) < tol.break) { + break + } + } + + # df <- (sum(!dropped) - d) * d + # BIC <- -sum(V * (M %*% V)) + log(n) * df / n + # cat("theta:", sprintf('%7.3f', range(theta)), + # "- iter:", sprintf('%3d', iter), + # "- df:", sprintf('%3d', df), + # "- BIC:", sprintf('%7.3f', BIC), + # "- dist:", sprintf('%7.3f', dist.subspace(V.last, V, normalize = TRUE)), + # # "- ", paste(sprintf('%6.2f', norms), collapse = ", "), + # '\n') + + structure(V, + theta = theta, iter = iter, BIC = BIC, df = df, + dist = dist.subspace(V.last, V, normalize = TRUE)) + }) + + structure(fits, class = c("tensorPredictors", "CISE")) +} diff --git a/tensorPredictors/R/lsir.R b/tensorPredictors/R/LSIR.R similarity index 100% rename from tensorPredictors/R/lsir.R rename to tensorPredictors/R/LSIR.R diff --git a/tensorPredictors/R/pca2d.R b/tensorPredictors/R/PCA2D.R similarity index 100% rename from tensorPredictors/R/pca2d.R rename to tensorPredictors/R/PCA2D.R diff --git a/tensorPredictors/R/poi.R b/tensorPredictors/R/POI.R similarity index 98% rename from tensorPredictors/R/poi.R rename to tensorPredictors/R/POI.R index cabbc00..085d55a 100644 --- a/tensorPredictors/R/poi.R +++ b/tensorPredictors/R/POI.R @@ -11,7 +11,7 @@ POI <- function(A, B, d, update.tol = 1e-3, tol = 100 * .Machine$double.eps, maxit = 400L, - maxit.outer = maxit, + # maxit.outer = maxit, maxit.inner = maxit, use.C = FALSE, method = 'FastPOI-C') { diff --git a/tensorPredictors/R/dist_subspace.R b/tensorPredictors/R/dist_subspace.R new file mode 100644 index 0000000..cb4dc6c --- /dev/null +++ b/tensorPredictors/R/dist_subspace.R @@ -0,0 +1,37 @@ +#' Subspace distance +#' +#' @param A,B Basis matrices as representations of elements of the Grassmann +#' manifold. +#' @param is.ortho Boolean to specify if \eqn{A} and \eqn{B} are semi-orthogonal. +#' If false, the projection matrices are computed as +#' \deqn{P_A = A (A' A)^{-1} A'} +#' otherwise just \eqn{P_A = A A'} since \eqn{A' A} is the identity. +#' @param normalize Boolean to specify if the distance shall be normalized. +#' Meaning, the maximal distance scaled to be \eqn{1} independent of dimensions. +#' +#' @seealso +#' K. Ye and L.-H. Lim (2016) "Schubert varieties and distances between +#' subspaces of different dimensions" +#' +#' @export +dist.subspace <- function (A, B, is.ortho = FALSE, normalize = FALSE) { + if (!is.matrix(A)) A <- as.matrix(A) + if (!is.matrix(B)) B <- as.matrix(B) + + if (is.ortho) { + PA <- tcrossprod(A, A) + PB <- tcrossprod(B, B) + } else { + PA <- A %*% solve(t(A) %*% A, t(A)) + PB <- B %*% solve(t(B) %*% B, t(B)) + } + + if (normalize) { + rankSum <- ncol(A) + ncol(B) + c <- 1 / sqrt(min(rankSum, 2 * nrow(A) - rankSum)) + } else { + c <- sqrt(2) + } + + c * norm(PA - PB, type = "F") +} diff --git a/tensorPredictors/R/subspace.R b/tensorPredictors/R/subspace.R deleted file mode 100644 index 7ed8655..0000000 --- a/tensorPredictors/R/subspace.R +++ /dev/null @@ -1,35 +0,0 @@ -#' Angle between two subspaces -#' -#' Computes the principal angle between two subspaces spaned by the columns of -#' the matrices \code{A} and \code{B}. -#' -#' @param A,B Numeric matrices with column considered as the subspace spanning -#' vectors. Both must have the same number of rows (a.k.a must live in the -#' same space). -#' @param is.orth boolean determining if passed matrices A, B are allready -#' orthogonalized. If set to TRUE, A and B are assumed to have orthogonal -#' columns (which is not checked). -#' -#' @returns angle in radiants. -#' -subspace <- function(A, B, is.orth = FALSE) { - if (!is.numeric(A) || !is.numeric(B)) { - stop("Arguments 'A' and 'B' must be numeric.") - } - if (is.vector(A)) A <- as.matrix(A) - if (is.vector(B)) B <- as.matrix(B) - if (nrow(A) != nrow(B)) { - stop("Matrices 'A' and 'B' must have the same number of rows.") - } - if (!is.orth) { - A <- qr.Q(qr(A)) - B <- qr.Q(qr(B)) - } - if (ncol(A) < ncol(B)) { - tmp <- A; A <- B; B <- tmp - } - for (k in 1:ncol(A)) { - B <- B - tcrossprod(A[, k]) %*% B - } - asin(min(1, La.svd(B, 0L, 0L)$d)) -}