91 lines
2.8 KiB
R
91 lines
2.8 KiB
R
|
#' Simple implementation of the CVE method. 'Simple' means that this method is
|
||
|
#' a classic GD method unsing no further tricks.
|
||
|
#'
|
||
|
#' @keywords internal
|
||
|
#' @export
|
||
|
cve_sgd <- function(X, Y, k,
|
||
|
nObs = sqrt(nrow(X)),
|
||
|
h = NULL,
|
||
|
tau = 0.01,
|
||
|
epochs = 50L,
|
||
|
batch.size = 16L,
|
||
|
attempts = 10L
|
||
|
) {
|
||
|
# Get dimensions.
|
||
|
n <- nrow(X) # Number of samples.
|
||
|
p <- ncol(X) # Data dimensions
|
||
|
q <- p - k # Complement dimension of the SDR space.
|
||
|
|
||
|
# Save initial learning rate `tau`.
|
||
|
tau.init <- tau
|
||
|
|
||
|
# Estaimate bandwidth if not given.
|
||
|
if (missing(h) | !is.numeric(h)) {
|
||
|
h <- estimate.bandwidth(X, k, nObs)
|
||
|
}
|
||
|
|
||
|
# Init a list of data indices (shuffled for batching).
|
||
|
indices <- seq(n)
|
||
|
I_p <- diag(1, p)
|
||
|
|
||
|
# Init tracking of current best (according multiple attempts).
|
||
|
V.best <- NULL
|
||
|
loss.best <- Inf
|
||
|
|
||
|
# Start loop for multiple attempts.
|
||
|
for (attempt in 1:attempts) {
|
||
|
# Reset learning rate `tau`.
|
||
|
tau <- tau.init
|
||
|
|
||
|
# Sample a starting basis from the Stiefl manifold.
|
||
|
V <- rStiefl(p, q)
|
||
|
|
||
|
# Repeat `epochs` times
|
||
|
for (epoch in 1:epochs) {
|
||
|
# Shuffle batches
|
||
|
batch.shuffle <- sample(indices)
|
||
|
|
||
|
# Make a step for each batch.
|
||
|
for (start in seq(1, n, batch.size)) {
|
||
|
# Select batch data indices.
|
||
|
batch <- batch.shuffle[start:(start + batch.size - 1)]
|
||
|
# Remove `NA`'s (when `n` isn't a multiple of `batch.size`).
|
||
|
batch <- batch[!is.na(batch)]
|
||
|
|
||
|
# Compute batch gradient.
|
||
|
loss <- NULL
|
||
|
G <- grad(X[batch, ], Y[batch], V, h)
|
||
|
|
||
|
# Cayley transform matrix.
|
||
|
A <- (G %*% t(V)) - (V %*% t(G))
|
||
|
|
||
|
# Apply learning rate `tau`.
|
||
|
A.tau <- tau * A
|
||
|
# Parallet transport (on Stiefl manifold) into direction of `G`.
|
||
|
V <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
|
||
|
}
|
||
|
}
|
||
|
# Compute actuall loss after finishing optimization.
|
||
|
loss <- grad(X, Y, V, h, loss.only = TRUE)
|
||
|
# After each attempt, check if last attempt reached a better result.
|
||
|
if (!is.null(V.best)) { # Only required if there is already a result.
|
||
|
if (loss < loss.best) {
|
||
|
loss.best <- loss
|
||
|
V.best <- V
|
||
|
}
|
||
|
} else {
|
||
|
loss.best <- loss
|
||
|
V.best <- V
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return(list(
|
||
|
X = X, Y = Y, k = k,
|
||
|
nObs = nObs, h = h, tau = tau,
|
||
|
epochs = epochs, batch = batch, attempts = attempts,
|
||
|
loss = loss.best,
|
||
|
V = V.best,
|
||
|
B = null(V.best)
|
||
|
))
|
||
|
}
|