2
0
Fork 0
CVE/CVE_R/R/cve_rcg.R

180 lines
5.6 KiB
R
Raw Normal View History

2019-09-25 12:49:12 +00:00
#' Implementation of the CVE method as a Riemann Conjugated Gradient method.
#'
2019-09-25 12:49:12 +00:00
#' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector
#' Transport for Optimization on the Stiefel Manifold
#' @keywords internal
#' @export
2019-09-25 12:49:12 +00:00
cve_rcg <- function(X, Y, k,
nObs = sqrt(nrow(X)),
h = NULL,
tau = 1.0,
tol = 1e-4,
rho = 1e-4, # For Armijo condition.
slack = 0,
epochs = 50L,
attempts = 10L,
max.linesearch.iter = 20L,
logger = NULL
) {
# Set `grad` functions environment to enable if to find this environments
# local variabels, needed to enable the manipulation of this local variables
# from within `grad`.
environment(grad) <- environment()
# Get dimensions.
n <- nrow(X) # Number of samples.
p <- ncol(X) # Data dimensions
q <- p - k # Complement dimension of the SDR space.
# Save initial learning rate `tau`.
tau.init <- tau
# Addapt tolearance for break condition.
tol <- sqrt(2 * q) * tol
# Estaimate bandwidth if not given.
if (missing(h) || !is.numeric(h)) {
h <- estimate.bandwidth(X, k, nObs)
}
# Compute persistent data.
# Compute lookup indexes for symmetrie, lower/upper
# triangular parts and vectorization.
pair.index <- elem.pairs(seq(n))
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
# Index of vectorized matrix, for lower and upper triangular part.
lower <- ((i - 1) * n) + j
upper <- ((j - 1) * n) + i
# Create all pairewise differences of rows of `X`.
X_diff <- X[i, , drop = F] - X[j, , drop = F]
# Identity matrix.
I_p <- diag(1, p)
# Init tracking of current best (according multiple attempts).
V.best <- NULL
loss.best <- Inf
# Start loop for multiple attempts.
for (attempt in 1:attempts) {
# Reset learning rate `tau`.
tau <- tau.init
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
# optimization start value.
V <- rStiefl(p, q)
# Initial loss and gradient.
loss <- Inf
G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
# Set last loss (aka, loss after applying the step).
loss.last <- loss
# Cayley transform matrix `A`
A <- (G %*% t(V)) - (V %*% t(G))
2019-09-25 12:49:12 +00:00
A.last <- A
W <- -A
Z <- W %*% V
# Compute directional derivative.
loss.prime <- sum(G * Z) # Tr(G^T Z)
# Call logger with initial values before starting optimization.
if (is.function(logger)) {
epoch <- 0 # Set epoch count to 0 (only relevant for logging).
error <- NA
logger(environment())
}
## Start optimization loop.
for (epoch in 1:epochs) {
2019-09-25 12:49:12 +00:00
# New directional derivative.
loss.prime <- sum(G * Z)
# Reset `tau` for step-size selection.
tau <- tau.init
for (iter in 1:max.linesearch.iter) {
V.tau <- retractStiefl(V + tau * Z)
# Loss at position after a step.
loss <- grad(X, Y, V.tau, h,
loss.only = TRUE, persistent = TRUE)
# Check Armijo condition.
if (loss <= loss.last + (rho * tau * loss.prime)) {
break() # Iff fulfilled stop linesearch.
}
# Reduce step-size and continue linesearch.
tau <- tau / 2
}
# Compute error.
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
2019-09-25 12:49:12 +00:00
# Perform step with found step-size
V <- V.tau
loss.last <- loss
2019-09-25 12:49:12 +00:00
# Call logger.
if (is.function(logger)) {
logger(environment())
}
2019-09-25 12:49:12 +00:00
# Check break condition.
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
if (error < tol) {
break()
}
2019-09-25 12:49:12 +00:00
# Compute Gradient at new position.
G <- grad(X, Y, V, h, persistent = TRUE)
# Store last `A` for `beta` computation.
A.last <- A
# Cayley transform matrix `A`
A <- (G %*% t(V)) - (V %*% t(G))
2019-09-25 12:49:12 +00:00
# Check 2. break condition.
if (norm(A, type = 'F') < tol) {
break()
}
# New directional derivative.
loss.prime <- sum(G * Z)
# Reset beta if needed.
if (loss.prime < 0) {
2019-10-22 08:33:41 +00:00
# Compute `beta` as described in paper.
2019-09-25 12:49:12 +00:00
beta.FR <- (norm(A, type = 'F') / norm(A.last, type = 'F'))^2
beta.PR <- sum(A * (A - A.last)) / norm(A.last, type = 'F')^2
if (beta.PR < -beta.FR) {
beta <- -beta.FR
} else if (abs(beta.PR) < beta.FR) {
beta <- beta.PR
} else if (beta.PR > beta.FR) {
beta <- beta.FR
} else {
beta <- 0
}
} else {
beta <- 0
}
# Update direction.
W <- -A + beta * W
Z <- W %*% V
}
# Check if current attempt improved previous ones
if (loss < loss.best) {
loss.best <- loss
V.best <- V
}
}
return(list(
loss = loss.best,
V = V.best,
B = null(V.best),
h = h
))
}