#' Simple implementation of the CVE method. 'Simple' means that this method is #' a classic GD method unsing no further tricks. #' #' @keywords internal #' @export cve_simple <- function(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1.0, tol = 1e-3, slack = 0, epochs = 50L, attempts = 10L ) { # Addapt tolearance for break condition tol <- sqrt(2 * k) * tol tau.init <- tau # remember to reset for new attempt # Get dimensions. n <- nrow(X) p <- ncol(X) q <- p - k # Estaimate bandwidth if not given. if (missing(h) | !is.numeric(h)) { h <- estimate.bandwidth(X, k, nObs) } # Compue all static data. X_diff <- row.pair.apply(X, `-`) index <- matrix(seq(n * n), n, n) tri.i <- row.pair.apply(index[, 1, drop = FALSE], function(i, j) { i }) tri.j <- row.pair.apply(index[, 1, drop = FALSE], function(i, j) { j }) lower.tri.ind <- index[lower.tri(index)] upper.tri.ind <- t(index)[lower.tri.ind] # ATTENTION: corret order I_p <- diag(1, p) # Init variables for best attempt loss.best <- Inf V.best <- NULL # Take a view attempts with different starting values for (attempt in 1:attempts) { # reset step width `tau` tau <- tau.init # Sample a `(p, q)` dimensional matrix from the stiefel manifold as # optimization start value. V <- rStiefl(p, q) ## Initial loss calculation # Orthogonal projection to `span(V)`. Q <- I_p - (V %*% t(V)) # Compute vectorized distance matrix `D`. vecD <- rowSums((X_diff %*% Q)^2) # Compute weights matrix `W` W <- matrix(1, n, n) # init (`exp(0) = 1` in the diagonal) W[lower.tri.ind] <- exp(vecD / (-2 * h)) # set lower triangular part W[upper.tri.ind] <- t(W)[upper.tri.ind] # mirror to upper triangular part W <- sweep(W, 2, colSums(W), FUN = `/`) # normalize columns # Weighted `Y` momentums y1 <- Y %*% W # is 1D anyway, avoid transposing `W` y2 <- Y^2 %*% W # Get per sample loss `L(V, X_i)` L <- y2 - y1^2 # Sum to tolal loss `L(V)` loss <- mean(L) ## Start optimization loop. for (iter in 1:epochs) { # index according a lower triangular matrix stored in column major order # by only the `i` or `j` index. # vecW <- lower.tri.vector(W) + upper.tri.vector(W) vecW <- W[lower.tri.ind] + W[upper.tri.ind] S <- (L[tri.j] - (Y[tri.i] - y1[tri.j])^2) * vecW * vecD # Gradient G <- t(X_diff) %*% sweep(X_diff %*% V, 1, S, `*`); G <- (-2 / (n * h^2)) * G # Cayley transform matrix `A` A <- (G %*% t(V)) - (V %*% t(G)) # Compute next `V` by step size `tau` unsing the Cayley transform # via a parallel transport into the gradient direction. A.tau <- tau * A V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V) # Orthogonal projection to `span(V.tau)`. Q <- I_p - (V.tau %*% t(V.tau)) # Compute vectorized distance matrix `D`. vecD <- rowSums((X_diff %*% Q)^2) # Compute weights matrix `W` (only update values, diag keeps 1's) W[lower.tri.ind] <- exp(vecD / (-2 * h)) # set lower triangular part W[upper.tri.ind] <- t(W)[upper.tri.ind] # mirror to upper triangular part W <- sweep(W, 2, colSums(W), FUN = `/`) # normalize columns # Weighted `Y` momentums y1 <- Y %*% W # is 1D anyway, avoid transposing `W` y2 <- Y^2 %*% W # Get per sample loss `L(V, X_i)` L <- y2 - y1^2 # Sum to tolal loss `L(V)` loss.tau <- mean(L) # Check if step is appropriate if (loss != Inf & loss.tau - loss > slack * loss) { tau <- tau / 2 } else { loss <- loss.tau V <- V.tau } } # Check if current attempt improved previous ones if (loss.tau < loss.best) { loss.best <- loss.tau V.best <- V.tau } } return(list( loss = loss.best, V = V.best, B = null(V.best), h = h )) }