wip: POI
This commit is contained in:
parent
1db9b15fdb
commit
b0151dfafb
|
@ -87,8 +87,8 @@
|
||||||
#' @suggest RSpectra
|
#' @suggest RSpectra
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
CISE <- function(M, N, d = 1L, max.iter = 100L, Theta = NULL,
|
CISE <- function(M, N, d = 1L, method = "PFC", max.iter = 100L, Theta = NULL,
|
||||||
tol.norm = 1e-6, tol.break = 1e-3, r = 0.5
|
tol.norm = 1e-6, tol.break = 1e-6, r = 0.5
|
||||||
) {
|
) {
|
||||||
|
|
||||||
isrN <- matpow(N, -0.5) # N^-1/2 ... Inverse Square-Root of N
|
isrN <- matpow(N, -0.5) # N^-1/2 ... Inverse Square-Root of N
|
||||||
|
@ -111,22 +111,28 @@ CISE <- function(M, N, d = 1L, max.iter = 100L, Theta = NULL,
|
||||||
}
|
}
|
||||||
|
|
||||||
norms <- sqrt(rowSums(V.init^2)) # row norms of V
|
norms <- sqrt(rowSums(V.init^2)) # row norms of V
|
||||||
theta.vec <- 0.5 * ifelse(norms < tol.norm, 0, norms^(-r))
|
theta.scale <- 0.5 * ifelse(norms < tol.norm, 0, norms^(-r))
|
||||||
|
|
||||||
# For each penalty candidate
|
# For each penalty candidate
|
||||||
fits <- lapply(Theta, function(theta) {
|
fits <- lapply(Theta, function(theta) {
|
||||||
# Step 2: Iteratively optimize constraint GEP
|
# Step 2: Iteratively optimize constraint GEP
|
||||||
V <- V.init
|
V <- V.init
|
||||||
dropped <- norms < tol.norm
|
dropped <- rep(FALSE, nrow(M)) # Keep track of dropped variables
|
||||||
for (iter in seq_len(max.iter)) {
|
for (iter in seq_len(max.iter)) {
|
||||||
# Approx. penalty term derivative at current position
|
# Compute current row norms
|
||||||
norms <- sqrt(rowSums(V^2)) # row norms of V
|
norms <- sqrt(rowSums(V^2)) # row norms of V
|
||||||
dropped <- dropped | (norms < tol.norm)
|
# Check if variables are dropped. If so, update dropped and
|
||||||
h <- ifelse(dropped, 0, theta * (theta.vec / norms))
|
# recompute the inverse square root of N
|
||||||
|
if (any(norms < tol.norm)) {
|
||||||
|
dropped[!dropped] <- norms < tol.norm
|
||||||
|
norms <- norms[!(norms < tol.norm)]
|
||||||
|
isrN <- matpow(N[!dropped, !dropped], -0.5)
|
||||||
|
}
|
||||||
|
# Approx. penalty term derivative at current position
|
||||||
|
h <- theta * (theta.scale[!dropped] / norms)
|
||||||
|
|
||||||
# Updated G at current position (scaling by 1/2 done in `theta.vec`)
|
# Updated G at current position (scaling by 1/2 done in `theta.scale`)
|
||||||
A <- G - (isrN %*% (h * isrN))
|
A <- G[!dropped, !dropped] - (isrN %*% (h * isrN))
|
||||||
A[dropped, dropped] <- 0
|
|
||||||
# Solve next iteration GEP
|
# Solve next iteration GEP
|
||||||
Gamma <- if (requireNamespace("RSpectra", quietly = TRUE)) {
|
Gamma <- if (requireNamespace("RSpectra", quietly = TRUE)) {
|
||||||
RSpectra::eigs_sym(A, d)$vectors
|
RSpectra::eigs_sym(A, d)$vectors
|
||||||
|
@ -135,14 +141,22 @@ CISE <- function(M, N, d = 1L, max.iter = 100L, Theta = NULL,
|
||||||
}
|
}
|
||||||
V.last <- V
|
V.last <- V
|
||||||
V <- isrN %*% Gamma
|
V <- isrN %*% Gamma
|
||||||
V[dropped, ] <- 0
|
|
||||||
|
|
||||||
# break condition
|
# Check if there are enough variables left
|
||||||
if (dist.subspace(V.last, V, normalize = TRUE) < tol.break) {
|
if (nrow(V) < d + 1) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
# Break dondition (only when nothing dropped)
|
||||||
|
if (nrow(V.last) == nrow(V)
|
||||||
|
&& dist.subspace(V.last, V, normalize = TRUE) < tol.break) {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Recreate dropped variables and fill parameters with 0.
|
||||||
|
V.full <- matrix(0, nrow(M), d)
|
||||||
|
V.full[!dropped, ] <- V
|
||||||
|
|
||||||
# df <- (sum(!dropped) - d) * d
|
# df <- (sum(!dropped) - d) * d
|
||||||
# BIC <- -sum(V * (M %*% V)) + log(n) * df / n
|
# BIC <- -sum(V * (M %*% V)) + log(n) * df / n
|
||||||
# cat("theta:", sprintf('%7.3f', range(theta)),
|
# cat("theta:", sprintf('%7.3f', range(theta)),
|
||||||
|
@ -153,10 +167,12 @@ CISE <- function(M, N, d = 1L, max.iter = 100L, Theta = NULL,
|
||||||
# # "- ", paste(sprintf('%6.2f', norms), collapse = ", "),
|
# # "- ", paste(sprintf('%6.2f', norms), collapse = ", "),
|
||||||
# '\n')
|
# '\n')
|
||||||
|
|
||||||
structure(V,
|
structure(qr.Q(qr(V.full)),
|
||||||
theta = theta, iter = iter, BIC = BIC, df = df,
|
theta = theta, iter = iter, BIC = BIC, df = df,
|
||||||
dist = dist.subspace(V.last, V, normalize = TRUE))
|
dist = dist.subspace(V.last, V, normalize = TRUE))
|
||||||
})
|
})
|
||||||
|
|
||||||
structure(fits, class = c("tensorPredictors", "CISE"))
|
structure(fits,
|
||||||
|
call = match.call(),
|
||||||
|
class = c("tensorPredictors", "CISE"))
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,61 @@
|
||||||
|
#' Compute eigenvalue problem equiv. to method.
|
||||||
|
#'
|
||||||
|
#' Computes the matrices \eqn{A, B} of a generalized eigenvalue problem
|
||||||
|
#' \deqn{A X = B Lambda} with \eqn{X} the matrix of eigenvectors and
|
||||||
|
#' \eqn{Lambda} diagonal matrix of eigenvalues.
|
||||||
|
#'
|
||||||
|
#' @param X predictor matrix.
|
||||||
|
#' @param y responses (see details).
|
||||||
|
#' @param method One of "pca", "pfc", ... TODO: finish, add more, ...
|
||||||
|
#'
|
||||||
|
#' @returns list of matrices \code{lhs} for \eqn{A} and \code{rhs} for \eqn{B}.
|
||||||
|
#'
|
||||||
|
#' @seealso section 2.1 of "Coordinate-Independent Sparse Sufficient Dimension
|
||||||
|
#' Reduction and Variable Selection" By Xin Chen, Changliang Zou and
|
||||||
|
#' R. Dennis Cook.
|
||||||
|
#'
|
||||||
|
GEP <- function(X, y, method = c('pfc', 'pca', 'sir', 'save'), ...,
|
||||||
|
nr.slices = 10, ensamble = list(abs, identity, function(x) x^2)
|
||||||
|
) {
|
||||||
|
method <- match.arg(method)
|
||||||
|
|
||||||
|
if (method == 'pca') {
|
||||||
|
lhs <- cov(X) # covariance
|
||||||
|
rhs <- diag(ncol(X)) # identity
|
||||||
|
} else if (method == 'pfc') {
|
||||||
|
X <- scale(X, scale = FALSE, center = TRUE)
|
||||||
|
|
||||||
|
Fy <- sapply(ensamble, do.call, list(y))
|
||||||
|
Fy <- scale(Fy, scale = FALSE, center = TRUE)
|
||||||
|
|
||||||
|
# Compute Sigma_fit (the sample covariance matrix of the fitted vectors).
|
||||||
|
P_Fy <- Fy %*% solve(crossprod(Fy), t(Fy))
|
||||||
|
lhs <- crossprod(X, P_Fy %*% X) / nrow(X)
|
||||||
|
# Estimate Sigma (the MLE sample covariance matrix).
|
||||||
|
rhs <- crossprod(X) / nrow(X)
|
||||||
|
} else if (method == 'sir') {
|
||||||
|
if (NCOL(y) != 1) {
|
||||||
|
stop('For SIR only univariate response suported.')
|
||||||
|
}
|
||||||
|
# Build slices (if not categorical)
|
||||||
|
if (is.factor(y)) {
|
||||||
|
slice.index <- y
|
||||||
|
} else {
|
||||||
|
# TODO: make this proper, just for a bit of playing!!!
|
||||||
|
slice.size <- round(length(y) / nr.slices)
|
||||||
|
slice.index <- factor((rank(y) - 1) %/% slice.size)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Covariance of slice means, Cov(E[X - E X | y])
|
||||||
|
lhs <- cov(t(sapply(levels(slice.index), function(i) {
|
||||||
|
colMeans(X[slice.index == i, , drop = FALSE])
|
||||||
|
})))
|
||||||
|
# Sample covariance
|
||||||
|
rhs <- cov(X)
|
||||||
|
} else {
|
||||||
|
stop('Not implemented!')
|
||||||
|
}
|
||||||
|
|
||||||
|
# Return left- and right-hand-side of GEP equation system.
|
||||||
|
list(lhs = lhs, rhs = rhs)
|
||||||
|
}
|
|
@ -1,3 +1,40 @@
|
||||||
|
solve.gep <- function(A, B, d = nrow(A)) {
|
||||||
|
isrB <- matpow(B, -0.5)
|
||||||
|
|
||||||
|
if (requireNamespace("RSpectra", quietly = TRUE) && d < nrow(A)) {
|
||||||
|
eig <- RSpectra::eigs_sym(isrB %*% A %*% isrB, d)
|
||||||
|
} else {
|
||||||
|
eig <- eigen(isrB %*% A %*% isrB, symmetric = TRUE)
|
||||||
|
}
|
||||||
|
|
||||||
|
list(vectors = isrB %*% eig$vectors, values = eig$values)
|
||||||
|
}
|
||||||
|
|
||||||
|
POI.lambda.max <- function(A, d = 1L, method = c('POI-C', 'POI-L', 'FastPOI-C', 'FastPOI-L')) {
|
||||||
|
method <- match.arg(method)
|
||||||
|
|
||||||
|
if (method %in% c('POI-C', 'POI-L')) {
|
||||||
|
A2 <- apply(apply(A^2, 2, sort, decreasing = TRUE), 2, cumsum)
|
||||||
|
lambda.max <- sqrt(apply(A2[1:d, , drop = FALSE], 1, max))
|
||||||
|
if (method == 'POI-C') {
|
||||||
|
lambda.max[d]
|
||||||
|
} else {
|
||||||
|
lambda.max[1]
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (requireNamespace("RSpectra", quietly = TRUE)) {
|
||||||
|
vec <- RSpectra::eigs_sym(A, d)$vectors
|
||||||
|
} else {
|
||||||
|
vec <- eigen(A, symmetric = TRUE)$vectors
|
||||||
|
}
|
||||||
|
if (method == 'FastPOI-C') {
|
||||||
|
sqrt(max(rowSums(vec^2)))
|
||||||
|
} else { # 'FastPOI-L'
|
||||||
|
max(abs(vec))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#' Penalysed Orthogonal Iteration.
|
#' Penalysed Orthogonal Iteration.
|
||||||
#'
|
#'
|
||||||
#' @param lambda Default: 0.75 * lambda_max for FastPOI-C method.
|
#' @param lambda Default: 0.75 * lambda_max for FastPOI-C method.
|
||||||
|
@ -6,73 +43,171 @@
|
||||||
#' dyn.load('../tensor_predictors/poi.so')
|
#' dyn.load('../tensor_predictors/poi.so')
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
POI <- function(A, B, d,
|
POI <- function(A, B, d = 1L, sparsity = 0.5,
|
||||||
lambda = 0.75 * sqrt(max(rowSums(Delta^2))),
|
method = c('POI-C', 'FastPOI-C'), # TODO: Maybe implement the the lasso loss too
|
||||||
update.tol = 1e-3,
|
iter.outer = 100L, iter.inner = 500L,
|
||||||
tol = 100 * .Machine$double.eps,
|
tol = sqrt(.Machine$double.eps)
|
||||||
maxit = 400L,
|
) {
|
||||||
# maxit.outer = maxit,
|
method <- match.arg(method)
|
||||||
maxit.inner = maxit,
|
|
||||||
use.C = FALSE,
|
|
||||||
method = 'FastPOI-C') {
|
|
||||||
|
|
||||||
# TODO:
|
# Compute penalty parameter lambda
|
||||||
stopifnot(method == 'FastPOI-C')
|
lambda <- sparsity * POI.lambda.max(A, d, method)
|
||||||
|
|
||||||
if (requireNamespace("RSpectra", quietly = TRUE)) {
|
# Ensure RHS B to be positive definite
|
||||||
|
if (missing(B)) {
|
||||||
|
B <- diag(nrow(A))
|
||||||
|
} else {
|
||||||
|
rankB <- qr(B, tol)$rank
|
||||||
|
if (rankB < nrow(B)) {
|
||||||
|
diag(B) <- diag(B) + log(nrow(B)) / rankB
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# In case of zero penalty compute ordinary GEP solution
|
||||||
|
if (lambda == 0) {
|
||||||
|
eig <- solve.eig(A, B, d)
|
||||||
|
return(structure(list(
|
||||||
|
U = eig$vectors, d = eig$values,
|
||||||
|
lambda = 0, call = match.call()
|
||||||
|
), class = c("tensor_predictor", "POI")))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Set initial values
|
||||||
|
if (requireNamespace("RSpectra", quietly = TRUE) && nrow(A) < d) {
|
||||||
Delta <- RSpectra::eigs_sym(A, d)$vectors
|
Delta <- RSpectra::eigs_sym(A, d)$vectors
|
||||||
} else {
|
} else {
|
||||||
Delta <- eigen(A, symmetric = TRUE)$vectors[, 1:d, drop = FALSE]
|
Delta <- eigen(A, symmetric = TRUE)$vectors[, 1:d, drop = FALSE]
|
||||||
}
|
}
|
||||||
|
Q <- Delta
|
||||||
|
Z <- matrix(0, nrow(Q), ncol(Q))
|
||||||
|
|
||||||
# Set initial value.
|
# Outer loop (iteration)
|
||||||
Z <- Delta
|
for (i in seq_len(iter.outer)) {
|
||||||
|
Q.last <- Q # for break condition
|
||||||
|
|
||||||
# Step 1: Optimization.
|
# Step 1: Solve B Z_i = A Q_{i-1} for Z_i
|
||||||
# The "inner" optimization loop, aka repeated coordinate optimization.
|
Delta <- crossprod(A, Q)
|
||||||
if (use.C) {
|
# Inner Loop
|
||||||
Z <- .Call('FastPOI_C_sub', A, B, Delta, lambda, as.integer(maxit.inner),
|
for (j in seq_len(iter.inner)) {
|
||||||
PACKAGE = 'tensorPredictors')
|
Z.last <- Z # for break condition
|
||||||
} else {
|
|
||||||
p <- nrow(Z)
|
traces <- Delta - B %*% Z + diag(B) * Z
|
||||||
for (iter.inner in 1:maxit.inner) {
|
Z <- traces * (pmax(1 - lambda / sqrt(rowSums(traces^2)), 0) / diag(B))
|
||||||
Zold <- Z
|
|
||||||
for (g in 1:p) {
|
# Inner break condition
|
||||||
a <- Delta[g, ] - B[g, ] %*% Z + B[g, g] * Z[g, ]
|
if (norm(Z.last - Z, 'F') < tol) {
|
||||||
a_norm <- sqrt(sum(a^2))
|
break
|
||||||
if (a_norm > lambda) {
|
|
||||||
Z[g, ] <- a * ((1 - lambda / a_norm) / B[g, g])
|
|
||||||
} else {
|
|
||||||
Z[g, ] <- 0
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (norm(Z - Zold, 'F') < update.tol) {
|
}
|
||||||
break;
|
|
||||||
|
# Step 2: QR decomposition of Z_i = Q_i R_i.
|
||||||
|
if (d == 1L) {
|
||||||
|
Z.norm <- norm(Z, 'F')
|
||||||
|
if (Z.norm < tol) {
|
||||||
|
Q <- matrix(0, p, d)
|
||||||
|
} else {
|
||||||
|
Q <- Z / Z.norm
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
# Detect zero columns.
|
||||||
|
zero.col <- colSums(abs(Z)) < tol
|
||||||
|
if (all(zero.col)) {
|
||||||
|
Q <- matrix(0, p, d)
|
||||||
|
} else if (any(zero.col)) {
|
||||||
|
Q <- matrix(0, p, d)
|
||||||
|
Q[, !zero.col] <- qr.Q(qr(Z[, !zero.col]))
|
||||||
|
} else {
|
||||||
|
Q <- qr.Q(qr(Z))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# In case of fast POI, only one iteration
|
||||||
|
if (startsWith(method, 'Fast')) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if (norm(tcrossprod(Q, Q) - tcrossprod(Q.last, Q.last), 'F') < tol) {
|
||||||
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Step 2: QR decomposition.
|
# TODO: Finish with transformation to original solution U of
|
||||||
if (d == 1L) {
|
# A U = B U Lambda.
|
||||||
Z_norm <- sqrt(sum(Z^2))
|
|
||||||
if (Z_norm < tol) {
|
|
||||||
Q <- matrix(0, p, d)
|
|
||||||
} else {
|
|
||||||
Q <- Z / Z_norm
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
# Detect zero columns.
|
|
||||||
zeroColumn <- colSums(abs(Z)) < tol
|
|
||||||
if (all(zeroColumn)) {
|
|
||||||
Q <- matrix(0, p, d)
|
|
||||||
} else if (any(zeroColumn)) {
|
|
||||||
Q <- matrix(0, p, d)
|
|
||||||
Q[, !zeroColumn] <- qr.Q(qr(Z))
|
|
||||||
} else {
|
|
||||||
Q <- qr.Q(qr(Z))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return(list(Z = Z, Q = Q, iter.inner = if (use.C) NA else iter.inner,
|
structure(list(
|
||||||
lambda = lambda))
|
Z = Z, Q = Q,
|
||||||
|
lambda = lambda,
|
||||||
|
call = match.call()
|
||||||
|
), class = c("tensor_predictor", "POI"))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# POI.bak <- function(A, B, d,
|
||||||
|
# lambda = 0.75 * sqrt(max(rowSums(Delta^2))),
|
||||||
|
# update.tol = 1e-3,
|
||||||
|
# tol = 100 * .Machine$double.eps,
|
||||||
|
# maxit = 400L,
|
||||||
|
# # maxit.outer = maxit,
|
||||||
|
# maxit.inner = maxit,
|
||||||
|
# use.C = FALSE,
|
||||||
|
# method = 'FastPOI-C') {
|
||||||
|
|
||||||
|
# # TODO:
|
||||||
|
# stopifnot(method == 'FastPOI-C')
|
||||||
|
|
||||||
|
# if (requireNamespace("RSpectra", quietly = TRUE)) {
|
||||||
|
# Delta <- RSpectra::eigs_sym(A, d)$vectors
|
||||||
|
# } else {
|
||||||
|
# Delta <- eigen(A, symmetric = TRUE)$vectors[, 1:d, drop = FALSE]
|
||||||
|
# }
|
||||||
|
|
||||||
|
# # Set initial value.
|
||||||
|
# Z <- Delta
|
||||||
|
|
||||||
|
# # Step 1: Optimization.
|
||||||
|
# # The "inner" optimization loop, aka repeated coordinate optimization.
|
||||||
|
# if (use.C) {
|
||||||
|
# Z <- .Call('FastPOI_C_sub', A, B, Delta, lambda, as.integer(maxit.inner),
|
||||||
|
# PACKAGE = 'tensorPredictors')
|
||||||
|
# } else {
|
||||||
|
# p <- nrow(Z)
|
||||||
|
# for (iter.inner in 1:maxit.inner) {
|
||||||
|
# Zold <- Z
|
||||||
|
# for (g in 1:p) {
|
||||||
|
# a <- Delta[g, ] - B[g, ] %*% Z + B[g, g] * Z[g, ]
|
||||||
|
# a_norm <- sqrt(sum(a^2))
|
||||||
|
# if (a_norm > lambda) {
|
||||||
|
# Z[g, ] <- a * ((1 - lambda / a_norm) / B[g, g])
|
||||||
|
# } else {
|
||||||
|
# Z[g, ] <- 0
|
||||||
|
# }
|
||||||
|
# }
|
||||||
|
# if (norm(Z - Zold, 'F') < update.tol) {
|
||||||
|
# break
|
||||||
|
# }
|
||||||
|
# }
|
||||||
|
# }
|
||||||
|
|
||||||
|
# # Step 2: QR decomposition.
|
||||||
|
# if (d == 1L) {
|
||||||
|
# Z_norm <- sqrt(sum(Z^2))
|
||||||
|
# if (Z_norm < tol) {
|
||||||
|
# Q <- matrix(0, p, d)
|
||||||
|
# } else {
|
||||||
|
# Q <- Z / Z_norm
|
||||||
|
# }
|
||||||
|
# } else {
|
||||||
|
# # Detect zero columns.
|
||||||
|
# zeroColumn <- colSums(abs(Z)) < tol
|
||||||
|
# if (all(zeroColumn)) {
|
||||||
|
# Q <- matrix(0, p, d)
|
||||||
|
# } else if (any(zeroColumn)) {
|
||||||
|
# Q <- matrix(0, p, d)
|
||||||
|
# Q[, !zeroColumn] <- qr.Q(qr(Z))
|
||||||
|
# } else {
|
||||||
|
# Q <- qr.Q(qr(Z))
|
||||||
|
# }
|
||||||
|
# }
|
||||||
|
|
||||||
|
# list(Z = Z, Q = Q, iter.inner = if (use.C) NA else iter.inner,
|
||||||
|
# lambda = lambda)
|
||||||
|
# }
|
||||||
|
|
Loading…
Reference in New Issue