wip: implementing CISE
This commit is contained in:
parent
db550c77bd
commit
1db9b15fdb
|
@ -1,9 +1,11 @@
|
||||||
# Generated by roxygen2: do not edit by hand
|
# Generated by roxygen2: do not edit by hand
|
||||||
|
|
||||||
|
export(CISE)
|
||||||
export(LSIR)
|
export(LSIR)
|
||||||
export(PCA2d)
|
export(PCA2d)
|
||||||
export(POI)
|
export(POI)
|
||||||
export(approx.kronecker)
|
export(approx.kronecker)
|
||||||
|
export(dist.subspace)
|
||||||
export(reduce)
|
export(reduce)
|
||||||
import(stats)
|
import(stats)
|
||||||
useDynLib(tensorPredictors, .registration = TRUE)
|
useDynLib(tensorPredictors, .registration = TRUE)
|
||||||
|
|
|
@ -0,0 +1,162 @@
|
||||||
|
|
||||||
|
#' Coordinate-Independent Sparce Estimation.
|
||||||
|
#'
|
||||||
|
#' Solves penalized version of a GEP (Generalized Eigenvalue Problem)
|
||||||
|
#' \deqn{M V = N \Lambda V}
|
||||||
|
#' with \eqn{\Lambda} a matrix with eigenvalues on the main diagonal and \eqn{V}
|
||||||
|
#' are the first \eqn{d} eigenvectors.
|
||||||
|
#'
|
||||||
|
#' TODO: DOES NOT WORK, DON'T KNOW WHY (contact first author for the Matlab code)
|
||||||
|
#'
|
||||||
|
#' @param M is the GEP's left hand side
|
||||||
|
#' @param N is the GEP's right hand side
|
||||||
|
#' @param d number of leading eigenvalues, -vectors to be computed
|
||||||
|
#' @param max.iter maximum number of iterations for iterative optimization
|
||||||
|
#' @param Theta Penalty parameter, if not provided an reasonable estimate for
|
||||||
|
#' a grid of parameters is computed. If Theta is a vector (or number), the
|
||||||
|
#' provided values of Theta are used as penalty parameter candidates.
|
||||||
|
#' @param tol.norm numerical tolerance for dropping rows
|
||||||
|
#' @param tol.break break condition tolerance
|
||||||
|
#'
|
||||||
|
#' @returns a list
|
||||||
|
#'
|
||||||
|
#' @examples \dontrun{
|
||||||
|
#' # Study 1-4 from CISE paper
|
||||||
|
#' dataset <- function(name, n = 60, p = 24) {
|
||||||
|
#' name <- toupper(name)
|
||||||
|
#' if (!startsWith('M', name)) { name <- paste0('M', name) }
|
||||||
|
#'
|
||||||
|
#' if (name %in% c('M1', 'M2', 'M3')) {
|
||||||
|
#' Sigma <- 0.5^abs(outer(1:p, 1:p, `-`))
|
||||||
|
#' X <- rmvnorm(n, sigma = Sigma)
|
||||||
|
#' y <- switch(name,
|
||||||
|
#' M1 = rowSums(X[, 1:3]) + rnorm(n, 0, 0.5),
|
||||||
|
#' M2 = rowSums(X[, 1:3]) + rnorm(n, 0, 2),
|
||||||
|
#' M3 = X[, 1] / (0.5 + (X[, 2] + 1.5)^2) + rnorm(n, 0, 0.2)
|
||||||
|
#' )
|
||||||
|
#' B <- switch(name,
|
||||||
|
#' M1 = as.matrix(as.double(1:p < 4)),
|
||||||
|
#' M2 = as.matrix(as.double(1:p < 4)),
|
||||||
|
#' M3 = diag(1, p, 2)
|
||||||
|
#' )
|
||||||
|
#' } else if (name == 'M4') {
|
||||||
|
#' y <- rnorm(n)
|
||||||
|
#' Delta <- 0.5^abs(outer(1:p, 1:p, `-`))
|
||||||
|
#' Gamma <- 0.5 * cbind(
|
||||||
|
#' (1:p <= 4),
|
||||||
|
#' (-(1:p <= 4))^(1:p + 1)
|
||||||
|
#' )
|
||||||
|
#' B <- qr.Q(qr(solve(Delta, Gamma)))
|
||||||
|
#' X <- cbind(y, y^2) %*% t(Gamma) +
|
||||||
|
#' matpow(Delta, 0.5) %*% rmvnorm(n, rep(0, p))
|
||||||
|
#' } else {
|
||||||
|
#' stop('Unknown dataset name.')
|
||||||
|
#' }
|
||||||
|
#'
|
||||||
|
#' list(X = X, y = y, B = B)
|
||||||
|
#' }
|
||||||
|
#' # Sample dataset
|
||||||
|
#' n <- 1000
|
||||||
|
#' ds <- dataset(3, n = n)
|
||||||
|
#' # Convert to PFC associated GEP
|
||||||
|
#' Fy <- with(ds, cbind(abs(y), y, y^2))
|
||||||
|
#' P.Fy <- Fy %*% solve(crossprod(Fy), t(Fy))
|
||||||
|
#' M <- with(ds, crossprod(X, P.Fy %*% X) / nrow(X)) # Sigma Fit
|
||||||
|
#' N <- cov(ds$X) # Sigma
|
||||||
|
#'
|
||||||
|
#' fits <- CISE(M, N, d = ncol(ds$B), Theta = log(seq(1, exp(1e-3), len = 1000)))
|
||||||
|
#'
|
||||||
|
#' BIC <- unlist(Map(attr, fits, 'BIC'))
|
||||||
|
#' df <- unlist(Map(attr, fits, 'df'))
|
||||||
|
#' dist <- unlist(Map(attr, fits, 'dist'))
|
||||||
|
#' iter <- unlist(Map(attr, fits, 'iter'))
|
||||||
|
#' theta <- unlist(Map(attr, fits, 'theta'))
|
||||||
|
#' p.theta <- unlist(Map(function(V) sum(rowSums(V^2) > 1e-9), fits))
|
||||||
|
#'
|
||||||
|
#' par(mfrow = c(2, 2))
|
||||||
|
#' plot(theta, BIC, type = 'l')
|
||||||
|
#' plot(theta, p.theta, type = 'l')
|
||||||
|
#' plot(theta, dist, type = 'l')
|
||||||
|
#' plot(theta, iter, type = 'l')
|
||||||
|
#' }
|
||||||
|
#'
|
||||||
|
#' @seealso "Coordinate-Independent Sparse Sufficient Dimension
|
||||||
|
#' Reduction and Variable Selection" By Xin Chen, Changliang Zou and
|
||||||
|
#' R. Dennis Cook.
|
||||||
|
#'
|
||||||
|
#' @suggest RSpectra
|
||||||
|
#'
|
||||||
|
#' @export
|
||||||
|
CISE <- function(M, N, d = 1L, max.iter = 100L, Theta = NULL,
|
||||||
|
tol.norm = 1e-6, tol.break = 1e-3, r = 0.5
|
||||||
|
) {
|
||||||
|
|
||||||
|
isrN <- matpow(N, -0.5) # N^-1/2 ... Inverse Square-Root of N
|
||||||
|
G <- isrN %*% M %*% isrN # G = N^-1/2 M N^-1/2
|
||||||
|
|
||||||
|
# Step 1: Solve (ordinary, unconstraint) eigenvalue problem used as an
|
||||||
|
# initial value for following iterative optimization (Solution of (2.8))
|
||||||
|
Gamma <- if (requireNamespace("RSpectra", quietly = TRUE)) {
|
||||||
|
RSpectra::eigs_sym(G, d)$vectors
|
||||||
|
} else {
|
||||||
|
eigen(G, symmetric = TRUE)$vectors[, d, drop = FALSE]
|
||||||
|
}
|
||||||
|
V.init <- isrN %*% Gamma
|
||||||
|
|
||||||
|
# Build penalty grid
|
||||||
|
if (missing(Theta)) {
|
||||||
|
# TODO: figure out what a good min to max in steps grid is
|
||||||
|
theta.max <- sqrt(max(rowSums(Gamma^2)))
|
||||||
|
Theta <- seq(0.01 * theta.max, 0.75 * theta.max, length.out = 10)
|
||||||
|
}
|
||||||
|
|
||||||
|
norms <- sqrt(rowSums(V.init^2)) # row norms of V
|
||||||
|
theta.vec <- 0.5 * ifelse(norms < tol.norm, 0, norms^(-r))
|
||||||
|
|
||||||
|
# For each penalty candidate
|
||||||
|
fits <- lapply(Theta, function(theta) {
|
||||||
|
# Step 2: Iteratively optimize constraint GEP
|
||||||
|
V <- V.init
|
||||||
|
dropped <- norms < tol.norm
|
||||||
|
for (iter in seq_len(max.iter)) {
|
||||||
|
# Approx. penalty term derivative at current position
|
||||||
|
norms <- sqrt(rowSums(V^2)) # row norms of V
|
||||||
|
dropped <- dropped | (norms < tol.norm)
|
||||||
|
h <- ifelse(dropped, 0, theta * (theta.vec / norms))
|
||||||
|
|
||||||
|
# Updated G at current position (scaling by 1/2 done in `theta.vec`)
|
||||||
|
A <- G - (isrN %*% (h * isrN))
|
||||||
|
A[dropped, dropped] <- 0
|
||||||
|
# Solve next iteration GEP
|
||||||
|
Gamma <- if (requireNamespace("RSpectra", quietly = TRUE)) {
|
||||||
|
RSpectra::eigs_sym(A, d)$vectors
|
||||||
|
} else {
|
||||||
|
eigen(A, symmetric = TRUE)$vectors[, d, drop = FALSE]
|
||||||
|
}
|
||||||
|
V.last <- V
|
||||||
|
V <- isrN %*% Gamma
|
||||||
|
V[dropped, ] <- 0
|
||||||
|
|
||||||
|
# break condition
|
||||||
|
if (dist.subspace(V.last, V, normalize = TRUE) < tol.break) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# df <- (sum(!dropped) - d) * d
|
||||||
|
# BIC <- -sum(V * (M %*% V)) + log(n) * df / n
|
||||||
|
# cat("theta:", sprintf('%7.3f', range(theta)),
|
||||||
|
# "- iter:", sprintf('%3d', iter),
|
||||||
|
# "- df:", sprintf('%3d', df),
|
||||||
|
# "- BIC:", sprintf('%7.3f', BIC),
|
||||||
|
# "- dist:", sprintf('%7.3f', dist.subspace(V.last, V, normalize = TRUE)),
|
||||||
|
# # "- ", paste(sprintf('%6.2f', norms), collapse = ", "),
|
||||||
|
# '\n')
|
||||||
|
|
||||||
|
structure(V,
|
||||||
|
theta = theta, iter = iter, BIC = BIC, df = df,
|
||||||
|
dist = dist.subspace(V.last, V, normalize = TRUE))
|
||||||
|
})
|
||||||
|
|
||||||
|
structure(fits, class = c("tensorPredictors", "CISE"))
|
||||||
|
}
|
|
@ -11,7 +11,7 @@ POI <- function(A, B, d,
|
||||||
update.tol = 1e-3,
|
update.tol = 1e-3,
|
||||||
tol = 100 * .Machine$double.eps,
|
tol = 100 * .Machine$double.eps,
|
||||||
maxit = 400L,
|
maxit = 400L,
|
||||||
maxit.outer = maxit,
|
# maxit.outer = maxit,
|
||||||
maxit.inner = maxit,
|
maxit.inner = maxit,
|
||||||
use.C = FALSE,
|
use.C = FALSE,
|
||||||
method = 'FastPOI-C') {
|
method = 'FastPOI-C') {
|
|
@ -0,0 +1,37 @@
|
||||||
|
#' Subspace distance
|
||||||
|
#'
|
||||||
|
#' @param A,B Basis matrices as representations of elements of the Grassmann
|
||||||
|
#' manifold.
|
||||||
|
#' @param is.ortho Boolean to specify if \eqn{A} and \eqn{B} are semi-orthogonal.
|
||||||
|
#' If false, the projection matrices are computed as
|
||||||
|
#' \deqn{P_A = A (A' A)^{-1} A'}
|
||||||
|
#' otherwise just \eqn{P_A = A A'} since \eqn{A' A} is the identity.
|
||||||
|
#' @param normalize Boolean to specify if the distance shall be normalized.
|
||||||
|
#' Meaning, the maximal distance scaled to be \eqn{1} independent of dimensions.
|
||||||
|
#'
|
||||||
|
#' @seealso
|
||||||
|
#' K. Ye and L.-H. Lim (2016) "Schubert varieties and distances between
|
||||||
|
#' subspaces of different dimensions" <arXiv:1407.0900>
|
||||||
|
#'
|
||||||
|
#' @export
|
||||||
|
dist.subspace <- function (A, B, is.ortho = FALSE, normalize = FALSE) {
|
||||||
|
if (!is.matrix(A)) A <- as.matrix(A)
|
||||||
|
if (!is.matrix(B)) B <- as.matrix(B)
|
||||||
|
|
||||||
|
if (is.ortho) {
|
||||||
|
PA <- tcrossprod(A, A)
|
||||||
|
PB <- tcrossprod(B, B)
|
||||||
|
} else {
|
||||||
|
PA <- A %*% solve(t(A) %*% A, t(A))
|
||||||
|
PB <- B %*% solve(t(B) %*% B, t(B))
|
||||||
|
}
|
||||||
|
|
||||||
|
if (normalize) {
|
||||||
|
rankSum <- ncol(A) + ncol(B)
|
||||||
|
c <- 1 / sqrt(min(rankSum, 2 * nrow(A) - rankSum))
|
||||||
|
} else {
|
||||||
|
c <- sqrt(2)
|
||||||
|
}
|
||||||
|
|
||||||
|
c * norm(PA - PB, type = "F")
|
||||||
|
}
|
|
@ -1,35 +0,0 @@
|
||||||
#' Angle between two subspaces
|
|
||||||
#'
|
|
||||||
#' Computes the principal angle between two subspaces spaned by the columns of
|
|
||||||
#' the matrices \code{A} and \code{B}.
|
|
||||||
#'
|
|
||||||
#' @param A,B Numeric matrices with column considered as the subspace spanning
|
|
||||||
#' vectors. Both must have the same number of rows (a.k.a must live in the
|
|
||||||
#' same space).
|
|
||||||
#' @param is.orth boolean determining if passed matrices A, B are allready
|
|
||||||
#' orthogonalized. If set to TRUE, A and B are assumed to have orthogonal
|
|
||||||
#' columns (which is not checked).
|
|
||||||
#'
|
|
||||||
#' @returns angle in radiants.
|
|
||||||
#'
|
|
||||||
subspace <- function(A, B, is.orth = FALSE) {
|
|
||||||
if (!is.numeric(A) || !is.numeric(B)) {
|
|
||||||
stop("Arguments 'A' and 'B' must be numeric.")
|
|
||||||
}
|
|
||||||
if (is.vector(A)) A <- as.matrix(A)
|
|
||||||
if (is.vector(B)) B <- as.matrix(B)
|
|
||||||
if (nrow(A) != nrow(B)) {
|
|
||||||
stop("Matrices 'A' and 'B' must have the same number of rows.")
|
|
||||||
}
|
|
||||||
if (!is.orth) {
|
|
||||||
A <- qr.Q(qr(A))
|
|
||||||
B <- qr.Q(qr(B))
|
|
||||||
}
|
|
||||||
if (ncol(A) < ncol(B)) {
|
|
||||||
tmp <- A; A <- B; B <- tmp
|
|
||||||
}
|
|
||||||
for (k in 1:ncol(A)) {
|
|
||||||
B <- B - tcrossprod(A[, k]) %*% B
|
|
||||||
}
|
|
||||||
asin(min(1, La.svd(B, 0L, 0L)$d))
|
|
||||||
}
|
|
Loading…
Reference in New Issue