2019-09-02 19:07:56 +00:00
|
|
|
#' Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
|
|
|
|
#' conditions.
|
|
|
|
#'
|
|
|
|
#' @keywords internal
|
|
|
|
#' @export
|
|
|
|
cve_linesearch <- function(X, Y, k,
|
|
|
|
nObs = sqrt(nrow(X)),
|
|
|
|
h = NULL,
|
|
|
|
tau = 1.0,
|
|
|
|
tol = 1e-3,
|
|
|
|
rho1 = 0.1,
|
|
|
|
rho2 = 0.9,
|
|
|
|
slack = 0,
|
|
|
|
epochs = 50L,
|
|
|
|
attempts = 10L,
|
2019-09-12 16:42:28 +00:00
|
|
|
max.linesearch.iter = 10L,
|
|
|
|
logger = NULL
|
2019-09-02 19:07:56 +00:00
|
|
|
) {
|
|
|
|
# Set `grad` functions environment to enable if to find this environments
|
|
|
|
# local variabels, needed to enable the manipulation of this local variables
|
|
|
|
# from within `grad`.
|
|
|
|
environment(grad) <- environment()
|
|
|
|
|
|
|
|
# Get dimensions.
|
|
|
|
n <- nrow(X)
|
|
|
|
p <- ncol(X)
|
|
|
|
q <- p - k
|
|
|
|
|
|
|
|
# Save initial learning rate `tau`.
|
|
|
|
tau.init <- tau
|
|
|
|
# Addapt tolearance for break condition.
|
|
|
|
tol <- sqrt(2 * q) * tol
|
|
|
|
|
|
|
|
# Estaimate bandwidth if not given.
|
|
|
|
if (missing(h) | !is.numeric(h)) {
|
|
|
|
h <- estimate.bandwidth(X, k, nObs)
|
|
|
|
}
|
|
|
|
|
|
|
|
# Compute persistent data.
|
|
|
|
# Compute lookup indexes for symmetrie, lower/upper
|
|
|
|
# triangular parts and vectorization.
|
|
|
|
pair.index <- elem.pairs(seq(n))
|
2019-09-12 16:42:28 +00:00
|
|
|
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
|
|
|
|
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
|
2019-09-02 19:07:56 +00:00
|
|
|
# Matrix of vectorized indices. (vec(index) -> seq)
|
|
|
|
index <- matrix(seq(n * n), n, n)
|
|
|
|
lower <- index[lower.tri(index)]
|
|
|
|
upper <- t(index)[lower]
|
|
|
|
|
|
|
|
# Create all pairewise differences of rows of `X`.
|
|
|
|
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
|
|
|
# Identity matrix.
|
|
|
|
I_p <- diag(1, p)
|
|
|
|
|
|
|
|
# Init tracking of current best (according multiple attempts).
|
|
|
|
V.best <- NULL
|
|
|
|
loss.best <- Inf
|
|
|
|
|
|
|
|
# Start loop for multiple attempts.
|
|
|
|
for (attempt in 1:attempts) {
|
|
|
|
|
|
|
|
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
|
|
|
|
# optimization start value.
|
|
|
|
V <- rStiefl(p, q)
|
|
|
|
|
|
|
|
# Initial loss and gradient.
|
|
|
|
loss <- Inf
|
|
|
|
G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
|
|
|
|
# Set last loss (aka, loss after applying the step).
|
|
|
|
loss.last <- loss
|
|
|
|
|
2019-09-12 16:42:28 +00:00
|
|
|
# Call logger with initial values before starting optimization.
|
|
|
|
if (is.function(logger)) {
|
|
|
|
epoch <- 0 # Set epoch count to 0 (only relevant for logging).
|
|
|
|
error <- NA
|
|
|
|
logger(environment())
|
|
|
|
}
|
|
|
|
|
2019-09-02 19:07:56 +00:00
|
|
|
## Start optimization loop.
|
|
|
|
for (epoch in 1:epochs) {
|
|
|
|
|
|
|
|
# Cayley transform matrix `A`
|
|
|
|
A <- (G %*% t(V)) - (V %*% t(G))
|
|
|
|
|
|
|
|
# Directional derivative of the loss at current position, given
|
|
|
|
# as `Tr(G^T \cdot A \cdot V)`.
|
|
|
|
loss.prime <- -0.5 * norm(A, type = 'F')^2
|
|
|
|
|
|
|
|
# Linesearch
|
|
|
|
tau.upper <- Inf
|
|
|
|
tau.lower <- 0
|
|
|
|
tau <- tau.init
|
|
|
|
for (iter in 1:max.linesearch.iter) {
|
|
|
|
# Apply learning rate `tau`.
|
|
|
|
A.tau <- (tau / 2) * A
|
|
|
|
# Parallet transport (on Stiefl manifold) into direction of `G`.
|
|
|
|
inv <- solve(I_p + A.tau)
|
|
|
|
V.tau <- inv %*% ((I_p - A.tau) %*% V)
|
|
|
|
|
|
|
|
# Loss at position after a step.
|
|
|
|
loss <- Inf # aka loss.tau
|
|
|
|
G.tau <- grad(X, Y, V.tau, h, loss.out = TRUE, persistent = TRUE)
|
|
|
|
|
|
|
|
# Armijo condition.
|
|
|
|
if (loss > loss.last + (rho1 * tau * loss.prime)) {
|
|
|
|
tau.upper <- tau
|
|
|
|
tau <- (tau.lower + tau.upper) / 2
|
|
|
|
next()
|
|
|
|
}
|
|
|
|
|
|
|
|
V.prime.tau <- -0.5 * inv %*% A %*% (V + V.tau)
|
|
|
|
loss.prime.tau <- sum(G * V.prime.tau) # Tr(grad(tau)^T \cdot Y^'(tau))
|
|
|
|
|
|
|
|
# Wolfe condition.
|
|
|
|
if (loss.prime.tau < rho2 * loss.prime) {
|
|
|
|
tau.lower <- tau
|
|
|
|
if (tau.upper == Inf) {
|
|
|
|
tau <- 2 * tau.lower
|
|
|
|
} else {
|
|
|
|
tau <- (tau.lower + tau.upper) / 2
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
break()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# Compute error.
|
|
|
|
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
|
|
|
|
|
|
|
|
# Check break condition (epoch check to skip ignored gradient calc).
|
|
|
|
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
|
|
|
|
if (error < tol | epoch >= epochs) {
|
|
|
|
# take last step and stop optimization.
|
|
|
|
V <- V.tau
|
2019-09-12 16:42:28 +00:00
|
|
|
# Final call to the logger before stopping optimization
|
|
|
|
if (is.function(logger)) {
|
|
|
|
G <- G.tau
|
|
|
|
logger(environment())
|
|
|
|
}
|
2019-09-02 19:07:56 +00:00
|
|
|
break()
|
|
|
|
}
|
|
|
|
|
|
|
|
# Perform the step and remember previous loss.
|
|
|
|
V <- V.tau
|
|
|
|
loss.last <- loss
|
|
|
|
G <- G.tau
|
2019-09-12 16:42:28 +00:00
|
|
|
|
|
|
|
# Log after taking current step.
|
|
|
|
if (is.function(logger)) {
|
|
|
|
logger(environment())
|
|
|
|
}
|
|
|
|
|
2019-09-02 19:07:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
# Check if current attempt improved previous ones
|
|
|
|
if (loss < loss.best) {
|
|
|
|
loss.best <- loss
|
|
|
|
V.best <- V
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return(list(
|
|
|
|
loss = loss.best,
|
|
|
|
V = V.best,
|
|
|
|
B = null(V.best),
|
|
|
|
h = h
|
|
|
|
))
|
|
|
|
}
|