2
0
Fork 0
CVE/CVE_R/R/cve_simple.R

117 lines
3.4 KiB
R

#' Simple implementation of the CVE method. 'Simple' means that this method is
#' a classic GD method unsing no further tricks.
#'
#' @keywords internal
#' @export
cve_simple <- function(X, Y, k,
nObs = sqrt(nrow(X)),
h = NULL,
tau = 1.0,
tol = 1e-3,
slack = 0,
epochs = 50L,
attempts = 10L
) {
# Set `grad` functions environment to enable if to find this environments
# local variabels, needed to enable the manipulation of this local variables
# from within `grad`.
environment(grad) <- environment()
# Setup loss histroy.
loss.history <- matrix(NA, epochs, attempts);
# Get dimensions.
n <- nrow(X)
p <- ncol(X)
q <- p - k
# Save initial learning rate `tau`.
tau.init <- tau
# Addapt tolearance for break condition.
tol <- sqrt(2 * q) * tol
# Estaimate bandwidth if not given.
if (missing(h) | !is.numeric(h)) {
h <- estimate.bandwidth(X, k, nObs)
}
I_p <- diag(1, p)
# Init tracking of current best (according multiple attempts).
V.best <- NULL
loss.best <- Inf
# Start loop for multiple attempts.
for (attempt in 1:attempts) {
# reset step width `tau`
tau <- tau.init
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
# optimization start value.
V <- rStiefl(p, q)
# Initial loss and gradient.
loss <- Inf
G <- grad(X, Y, V, h, loss.out = TRUE) # `loss.out=T` sets `loss`!
# Set last loss (aka, loss after applying the step).
loss.last <- loss
# Cayley transform matrix `A`
A <- (G %*% t(V)) - (V %*% t(G))
## Start optimization loop.
for (epoch in 1:epochs) {
# Apply learning rate `tau`.
A.tau <- tau * A
# Parallet transport (on Stiefl manifold) into direction of `G`.
V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
# Loss at position after a step.
loss <- grad(X, Y, V.tau, h, loss.only = TRUE)
# Check if step is appropriate
if ((loss - loss.last) > slack * loss.last) {
tau <- tau / 2
next() # Keep position and try with smaller `tau`.
}
# Compute error.
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
# Check break condition (epoch check to skip ignored gradient calc).
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
if (error < tol | epoch >= epochs) {
# take last step and stop optimization.
V <- V.tau
break()
}
# Perform the step and remember previous loss.
V <- V.tau
loss.last <- loss
# Compute gradient at new position.
# Note: `loss` will be updated too!
G <- grad(X, Y, V, h, loss.out = TRUE, loss.log = TRUE)
# Cayley transform matrix `A`
A <- (G %*% t(V)) - (V %*% t(G))
}
# Check if current attempt improved previous ones
if (loss < loss.best) {
loss.best <- loss
V.best <- V
}
}
return(list(
loss.history = loss.history,
loss = loss.best,
V = V.best,
B = null(V.best),
h = h
))
}