138 lines
4.4 KiB
R
138 lines
4.4 KiB
R
#' Simple implementation of the CVE method. 'Simple' means that this method is
|
|
#' a classic GD method unsing no further tricks.
|
|
#'
|
|
#' @keywords internal
|
|
#' @export
|
|
cve_simple <- function(X, Y, k,
|
|
nObs = sqrt(nrow(X)),
|
|
h = NULL,
|
|
tau = 1.0,
|
|
tol = 1e-3,
|
|
slack = 0,
|
|
epochs = 50L,
|
|
attempts = 10L
|
|
) {
|
|
# Addapt tolearance for break condition
|
|
tol <- sqrt(2 * k) * tol
|
|
tau.init <- tau # remember to reset for new attempt
|
|
|
|
# Get dimensions.
|
|
n <- nrow(X)
|
|
p <- ncol(X)
|
|
q <- p - k
|
|
|
|
# Estaimate bandwidth if not given.
|
|
if (missing(h) | !is.numeric(h)) {
|
|
h <- estimate.bandwidth(X, k, nObs)
|
|
}
|
|
|
|
# Compue all static data.
|
|
X_diff <- row.pair.apply(X, `-`)
|
|
index <- matrix(seq(n * n), n, n)
|
|
tri.i <- row.pair.apply(index[, 1, drop = FALSE], function(i, j) { i })
|
|
tri.j <- row.pair.apply(index[, 1, drop = FALSE], function(i, j) { j })
|
|
lower.tri.ind <- index[lower.tri(index)]
|
|
upper.tri.ind <- t(index)[lower.tri.ind] # ATTENTION: corret order
|
|
|
|
I_p <- diag(1, p)
|
|
|
|
# Init variables for best attempt
|
|
loss.best <- Inf
|
|
V.best <- NULL
|
|
|
|
# Take a view attempts with different starting values
|
|
for (attempt in 1:attempts) {
|
|
|
|
# reset step width `tau`
|
|
tau <- tau.init
|
|
|
|
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
|
|
# optimization start value.
|
|
V <- rStiefl(p, q)
|
|
|
|
## Initial loss calculation
|
|
# Orthogonal projection to `span(V)`.
|
|
Q <- I_p - (V %*% t(V))
|
|
|
|
# Compute vectorized distance matrix `D`.
|
|
vecD <- rowSums((X_diff %*% Q)^2)
|
|
# Compute weights matrix `W`
|
|
W <- matrix(1, n, n) # init (`exp(0) = 1` in the diagonal)
|
|
W[lower.tri.ind] <- exp(vecD / (-2 * h)) # set lower triangular part
|
|
W[upper.tri.ind] <- t(W)[upper.tri.ind] # mirror to upper triangular part
|
|
W <- sweep(W, 2, colSums(W), FUN = `/`) # normalize columns
|
|
|
|
# Weighted `Y` momentums
|
|
y1 <- Y %*% W # is 1D anyway, avoid transposing `W`
|
|
y2 <- Y^2 %*% W
|
|
|
|
# Get per sample loss `L(V, X_i)`
|
|
L <- y2 - y1^2
|
|
# Sum to tolal loss `L(V)`
|
|
loss <- mean(L)
|
|
|
|
## Start optimization loop.
|
|
for (iter in 1:epochs) {
|
|
|
|
# index according a lower triangular matrix stored in column major order
|
|
# by only the `i` or `j` index.
|
|
# vecW <- lower.tri.vector(W) + upper.tri.vector(W)
|
|
vecW <- W[lower.tri.ind] + W[upper.tri.ind]
|
|
S <- (L[tri.j] - (Y[tri.i] - y1[tri.j])^2) * vecW * vecD
|
|
|
|
# Gradient
|
|
G <- t(X_diff) %*% sweep(X_diff %*% V, 1, S, `*`);
|
|
G <- (-2 / (n * h^2)) * G
|
|
|
|
# Cayley transform matrix `A`
|
|
A <- (G %*% t(V)) - (V %*% t(G))
|
|
|
|
# Compute next `V` by step size `tau` unsing the Cayley transform
|
|
# via a parallel transport into the gradient direction.
|
|
A.tau <- tau * A
|
|
V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
|
|
|
|
# Orthogonal projection to `span(V.tau)`.
|
|
Q <- I_p - (V.tau %*% t(V.tau))
|
|
|
|
# Compute vectorized distance matrix `D`.
|
|
vecD <- rowSums((X_diff %*% Q)^2)
|
|
# Compute weights matrix `W` (only update values, diag keeps 1's)
|
|
W[lower.tri.ind] <- exp(vecD / (-2 * h)) # set lower triangular part
|
|
W[upper.tri.ind] <- t(W)[upper.tri.ind] # mirror to upper triangular part
|
|
W <- sweep(W, 2, colSums(W), FUN = `/`) # normalize columns
|
|
|
|
# Weighted `Y` momentums
|
|
y1 <- Y %*% W # is 1D anyway, avoid transposing `W`
|
|
y2 <- Y^2 %*% W
|
|
|
|
# Get per sample loss `L(V, X_i)`
|
|
L <- y2 - y1^2
|
|
# Sum to tolal loss `L(V)`
|
|
loss.tau <- mean(L)
|
|
|
|
# Check if step is appropriate
|
|
if (loss != Inf & loss.tau - loss > slack * loss) {
|
|
tau <- tau / 2
|
|
} else {
|
|
loss <- loss.tau
|
|
V <- V.tau
|
|
}
|
|
}
|
|
|
|
# Check if current attempt improved previous ones
|
|
if (loss.tau < loss.best) {
|
|
loss.best <- loss.tau
|
|
V.best <- V.tau
|
|
}
|
|
|
|
}
|
|
|
|
return(list(
|
|
loss = loss.best,
|
|
V = V.best,
|
|
B = null(V.best),
|
|
h = h
|
|
))
|
|
}
|