#' Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe #' conditions. #' #' @keywords internal #' @export cve_linesearch <- function(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1.0, tol = 1e-3, rho1 = 0.1, rho2 = 0.9, slack = 0, epochs = 50L, attempts = 10L, max.linesearch.iter = 10L, logger = NULL ) { # Set `grad` functions environment to enable if to find this environments # local variabels, needed to enable the manipulation of this local variables # from within `grad`. environment(grad) <- environment() # Get dimensions. n <- nrow(X) p <- ncol(X) q <- p - k # Save initial learning rate `tau`. tau.init <- tau # Addapt tolearance for break condition. tol <- sqrt(2 * q) * tol # Estaimate bandwidth if not given. if (missing(h) | !is.numeric(h)) { h <- estimate.bandwidth(X, k, nObs) } # Compute persistent data. # Compute lookup indexes for symmetrie, lower/upper # triangular parts and vectorization. pair.index <- elem.pairs(seq(n)) i <- pair.index[1, ] # `i` indices of `(i, j)` pairs j <- pair.index[2, ] # `j` indices of `(i, j)` pairs # Matrix of vectorized indices. (vec(index) -> seq) index <- matrix(seq(n * n), n, n) lower <- index[lower.tri(index)] upper <- t(index)[lower] # Create all pairewise differences of rows of `X`. X_diff <- X[i, , drop = F] - X[j, , drop = F] # Identity matrix. I_p <- diag(1, p) # Init tracking of current best (according multiple attempts). V.best <- NULL loss.best <- Inf # Start loop for multiple attempts. for (attempt in 1:attempts) { # Sample a `(p, q)` dimensional matrix from the stiefel manifold as # optimization start value. V <- rStiefl(p, q) # Initial loss and gradient. loss <- Inf G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE) # Set last loss (aka, loss after applying the step). loss.last <- loss # Call logger with initial values before starting optimization. if (is.function(logger)) { epoch <- 0 # Set epoch count to 0 (only relevant for logging). error <- NA logger(environment()) } ## Start optimization loop. for (epoch in 1:epochs) { # Cayley transform matrix `A` A <- (G %*% t(V)) - (V %*% t(G)) # Directional derivative of the loss at current position, given # as `Tr(G^T \cdot A \cdot V)`. loss.prime <- -0.5 * norm(A, type = 'F')^2 # Linesearch tau.upper <- Inf tau.lower <- 0 tau <- tau.init for (iter in 1:max.linesearch.iter) { # Apply learning rate `tau`. A.tau <- (tau / 2) * A # Parallet transport (on Stiefl manifold) into direction of `G`. inv <- solve(I_p + A.tau) V.tau <- inv %*% ((I_p - A.tau) %*% V) # Loss at position after a step. loss <- Inf # aka loss.tau G.tau <- grad(X, Y, V.tau, h, loss.out = TRUE, persistent = TRUE) # Armijo condition. if (loss > loss.last + (rho1 * tau * loss.prime)) { tau.upper <- tau tau <- (tau.lower + tau.upper) / 2 next() } V.prime.tau <- -0.5 * inv %*% A %*% (V + V.tau) loss.prime.tau <- sum(G * V.prime.tau) # Tr(grad(tau)^T \cdot Y^'(tau)) # Wolfe condition. if (loss.prime.tau < rho2 * loss.prime) { tau.lower <- tau if (tau.upper == Inf) { tau <- 2 * tau.lower } else { tau <- (tau.lower + tau.upper) / 2 } } else { break() } } # Compute error. error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F") # Check break condition (epoch check to skip ignored gradient calc). # Note: the devision by `sqrt(2 * k)` is included in `tol`. if (error < tol | epoch >= epochs) { # take last step and stop optimization. V <- V.tau # Final call to the logger before stopping optimization if (is.function(logger)) { G <- G.tau logger(environment()) } break() } # Perform the step and remember previous loss. V <- V.tau loss.last <- loss G <- G.tau # Log after taking current step. if (is.function(logger)) { logger(environment()) } } # Check if current attempt improved previous ones if (loss < loss.best) { loss.best <- loss V.best <- V } } return(list( loss = loss.best, V = V.best, B = null(V.best), h = h )) }