#' Simple implementation of the CVE method. 'Simple' means that this method is #' a classic GD method unsing no further tricks. #' #' @keywords internal #' @export cve_simple <- function(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1.0, tol = 1e-3, slack = 0, epochs = 50L, attempts = 10L ) { # Set `grad` functions environment to enable if to find this environments # local variabels, needed to enable the manipulation of this local variables # from within `grad`. environment(grad) <- environment() # Setup loss histroy. loss.history <- matrix(NA, epochs, attempts); # Get dimensions. n <- nrow(X) p <- ncol(X) q <- p - k # Save initial learning rate `tau`. tau.init <- tau # Addapt tolearance for break condition. tol <- sqrt(2 * q) * tol # Estaimate bandwidth if not given. if (missing(h) | !is.numeric(h)) { h <- estimate.bandwidth(X, k, nObs) } I_p <- diag(1, p) # Init tracking of current best (according multiple attempts). V.best <- NULL loss.best <- Inf # Start loop for multiple attempts. for (attempt in 1:attempts) { # reset step width `tau` tau <- tau.init # Sample a `(p, q)` dimensional matrix from the stiefel manifold as # optimization start value. V <- rStiefl(p, q) # Initial loss and gradient. loss <- Inf G <- grad(X, Y, V, h, loss.out = TRUE) # `loss.out=T` sets `loss`! # Set last loss (aka, loss after applying the step). loss.last <- loss # Cayley transform matrix `A` A <- (G %*% t(V)) - (V %*% t(G)) ## Start optimization loop. for (epoch in 1:epochs) { # Apply learning rate `tau`. A.tau <- tau * A # Parallet transport (on Stiefl manifold) into direction of `G`. V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V) # Loss at position after a step. loss <- grad(X, Y, V.tau, h, loss.only = TRUE) # Check if step is appropriate if ((loss - loss.last) > slack * loss.last) { tau <- tau / 2 next() # Keep position and try with smaller `tau`. } # Compute error. error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F") # Check break condition (epoch check to skip ignored gradient calc). # Note: the devision by `sqrt(2 * k)` is included in `tol`. if (error < tol | epoch >= epochs) { # take last step and stop optimization. V <- V.tau break() } # Perform the step and remember previous loss. V <- V.tau loss.last <- loss # Compute gradient at new position. # Note: `loss` will be updated too! G <- grad(X, Y, V, h, loss.out = TRUE, loss.log = TRUE) # Cayley transform matrix `A` A <- (G %*% t(V)) - (V %*% t(G)) } # Check if current attempt improved previous ones if (loss < loss.best) { loss.best <- loss V.best <- V } } return(list( loss.history = loss.history, loss = loss.best, V = V.best, B = null(V.best), h = h )) }