CVE/CVE_R/R/cve_sgd.R

#' Simple implementation of the CVE method. 'Simple' means that this method is
#' a classic GD method unsing no further tricks.
#'
#' @keywords internal
#' @export
cve_sgd <- function(X, Y, k,
                    nObs = sqrt(nrow(X)),
                    h = NULL,
                    tau = 0.01,
                    tol = 1e-3,
                    epochs = 50L,
                    batch.size = 16L,
                    attempts = 10L
) {
    # Set `grad` functions environment to enable if to find this environments
    # local variabels, needed to enable the manipulation of this local variables
    # from within `grad`.
    environment(grad) <- environment()

    # Setup histories.
    loss.history <- matrix(NA, epochs, attempts)
    error.history <- matrix(NA, epochs, attempts)

    # Get dimensions.
    n <- nrow(X) # Number of samples.
    p <- ncol(X) # Data dimensions
    q <- p - k   # Complement dimension of the SDR space.

    # Save initial learning rate `tau`.
    tau.init <- tau
    # Addapt tolearance for break condition.
    tol <- sqrt(2 * q) * tol

    # Estaimate bandwidth if not given.
    if (missing(h) | !is.numeric(h)) {
        h <- estimate.bandwidth(X, k, nObs)
    }

    # Compute persistent data.
    # Compute lookup indexes for symmetrie, lower/upper
    # triangular parts and vectorization.
    pair.index <- elem.pairs(seq(n))
    i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
    j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
    # Matrix of vectorized indices. (vec(index) -> seq)
    index <- matrix(seq(n * n), n, n)
    lower <- index[lower.tri(index)]
    upper <- t(index)[lower]

    # Create all pairewise differences of rows of `X`.
    X_diff <- X[i, , drop = F] - X[j, , drop = F]
    # Identity matrix.
    I_p <- diag(1, p)
    # Init a list of data indices (shuffled for batching).
    indices <- seq(n)

    # Init tracking of current best (according multiple attempts).
    V.best <- NULL
    loss.best <- Inf

    # Start loop for multiple attempts.
    for (attempt in 1:attempts) {
        # Reset learning rate `tau`.
        tau <- tau.init

        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
        # optimization start value.
        V <- rStiefl(p, q)
        # Keep track of last `V` for computing error after an epoch.
        V.last <- V

        # Repeat `epochs` times
        for (epoch in 1:epochs) {
            # Shuffle batches
            batch.shuffle <- sample(indices)

            # Make a step for each batch.
            for (start in seq(1, n, batch.size)) {
                # Select batch data indices.
                batch <- batch.shuffle[start:(start + batch.size - 1)]
                # Remove `NA`'s (when `n` isn't a multiple of `batch.size`).
                batch <- batch[!is.na(batch)]

                # Compute batch gradient.
                loss <- NULL
                G <- grad(X[batch, ], Y[batch], V, h, loss.out = TRUE)

                # Cayley transform matrix.
                A <- (G %*% t(V)) - (V %*% t(G))

                # Apply learning rate `tau`.
                A.tau <- tau * A
                # Parallet transport (on Stiefl manifold) into direction of `G`.
                V <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
            }
            # Compute actuall loss after finishing optimization.
            loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
            # And the error for the history.
            error <- norm(V.last %*% t(V.last) - V %*% t(V), type = "F")
            V.last <- V

            # Finaly write history.
            loss.history[epoch, attempt] <- loss
            error.history[epoch, attempt] <- error

            # Check break condition.
            if (error < tol) {
                break()
            }
        }
        # After each attempt, check if last attempt reached a better result.
        if (loss < loss.best) {
            loss.best <- loss
            V.best <- V
        }
    }

    return(list(
        loss.history = loss.history,
        error.history = error.history,
        loss = loss.best,
        V = V.best,
        B = null(V.best),
        h = h
    ))
}
add: runtime test, add: new CVE pure R implementation, fix: small adaptation of legacy code to make it run, wip: ... 2019-08-30 19:16:52 +00:00			`#' Simple implementation of the CVE method. 'Simple' means that this method is`
			`#' a classic GD method unsing no further tricks.`
			`#'`
			`#' @keywords internal`
			`#' @export`
			`cve_sgd <- function(X, Y, k,`
			`nObs = sqrt(nrow(X)),`
			`h = NULL,`
			`tau = 0.01,`
add: cve_linesearch, fix: cleaned and optimized gradient, add. notes 2019-09-02 19:07:56 +00:00			`tol = 1e-3,`
add: runtime test, add: new CVE pure R implementation, fix: small adaptation of legacy code to make it run, wip: ... 2019-08-30 19:16:52 +00:00			`epochs = 50L,`
			`batch.size = 16L,`
			`attempts = 10L`
			`) {`
add: cve_linesearch, fix: cleaned and optimized gradient, add. notes 2019-09-02 19:07:56 +00:00			# Set `grad` functions environment to enable if to find this environments
			`# local variabels, needed to enable the manipulation of this local variables`
			# from within `grad`.
			`environment(grad) <- environment()`

			`# Setup histories.`
			`loss.history <- matrix(NA, epochs, attempts)`
			`error.history <- matrix(NA, epochs, attempts)`

add: runtime test, add: new CVE pure R implementation, fix: small adaptation of legacy code to make it run, wip: ... 2019-08-30 19:16:52 +00:00			`# Get dimensions.`
			`n <- nrow(X) # Number of samples.`
			`p <- ncol(X) # Data dimensions`
			`q <- p - k # Complement dimension of the SDR space.`

			# Save initial learning rate `tau`.
			`tau.init <- tau`
add: cve_linesearch, fix: cleaned and optimized gradient, add. notes 2019-09-02 19:07:56 +00:00			`# Addapt tolearance for break condition.`
			`tol <- sqrt(2 * q) * tol`
add: runtime test, add: new CVE pure R implementation, fix: small adaptation of legacy code to make it run, wip: ... 2019-08-30 19:16:52 +00:00
			`# Estaimate bandwidth if not given.`
			`if (missing(h) \| !is.numeric(h)) {`
			`h <- estimate.bandwidth(X, k, nObs)`
			`}`

add: cve_linesearch, fix: cleaned and optimized gradient, add. notes 2019-09-02 19:07:56 +00:00			`# Compute persistent data.`
			`# Compute lookup indexes for symmetrie, lower/upper`
			`# triangular parts and vectorization.`
			`pair.index <- elem.pairs(seq(n))`
			i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
			j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
			`# Matrix of vectorized indices. (vec(index) -> seq)`
			`index <- matrix(seq(n * n), n, n)`
			`lower <- index[lower.tri(index)]`
			`upper <- t(index)[lower]`

			# Create all pairewise differences of rows of `X`.
			`X_diff <- X[i, , drop = F] - X[j, , drop = F]`
			`# Identity matrix.`
			`I_p <- diag(1, p)`
add: runtime test, add: new CVE pure R implementation, fix: small adaptation of legacy code to make it run, wip: ... 2019-08-30 19:16:52 +00:00			`# Init a list of data indices (shuffled for batching).`
			`indices <- seq(n)`

			`# Init tracking of current best (according multiple attempts).`
			`V.best <- NULL`
			`loss.best <- Inf`

			`# Start loop for multiple attempts.`
			`for (attempt in 1:attempts) {`
			# Reset learning rate `tau`.
			`tau <- tau.init`

fix: cve_simple runs correct, add: notes 2019-09-02 13:22:35 +00:00			# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
			`# optimization start value.`
add: runtime test, add: new CVE pure R implementation, fix: small adaptation of legacy code to make it run, wip: ... 2019-08-30 19:16:52 +00:00			`V <- rStiefl(p, q)`
add: cve_linesearch, fix: cleaned and optimized gradient, add. notes 2019-09-02 19:07:56 +00:00			# Keep track of last `V` for computing error after an epoch.
			`V.last <- V`
add: runtime test, add: new CVE pure R implementation, fix: small adaptation of legacy code to make it run, wip: ... 2019-08-30 19:16:52 +00:00
			# Repeat `epochs` times
			`for (epoch in 1:epochs) {`
			`# Shuffle batches`
			`batch.shuffle <- sample(indices)`

			`# Make a step for each batch.`
			`for (start in seq(1, n, batch.size)) {`
			`# Select batch data indices.`
			`batch <- batch.shuffle[start:(start + batch.size - 1)]`
			# Remove `NA`'s (when `n` isn't a multiple of `batch.size`).
			`batch <- batch[!is.na(batch)]`

			`# Compute batch gradient.`
			`loss <- NULL`
add: cve_linesearch, fix: cleaned and optimized gradient, add. notes 2019-09-02 19:07:56 +00:00			`G <- grad(X[batch, ], Y[batch], V, h, loss.out = TRUE)`
add: runtime test, add: new CVE pure R implementation, fix: small adaptation of legacy code to make it run, wip: ... 2019-08-30 19:16:52 +00:00
			`# Cayley transform matrix.`
			`A <- (G %% t(V)) - (V %% t(G))`

			# Apply learning rate `tau`.
			`A.tau <- tau * A`
			# Parallet transport (on Stiefl manifold) into direction of `G`.
			`V <- solve(I_p + A.tau) %% ((I_p - A.tau) %% V)`
			`}`
add: cve_linesearch, fix: cleaned and optimized gradient, add. notes 2019-09-02 19:07:56 +00:00			`# Compute actuall loss after finishing optimization.`
			`loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)`
			`# And the error for the history.`
			`error <- norm(V.last %% t(V.last) - V %% t(V), type = "F")`
			`V.last <- V`

			`# Finaly write history.`
			`loss.history[epoch, attempt] <- loss`
			`error.history[epoch, attempt] <- error`

			`# Check break condition.`
			`if (error < tol) {`
			`break()`
			`}`
add: runtime test, add: new CVE pure R implementation, fix: small adaptation of legacy code to make it run, wip: ... 2019-08-30 19:16:52 +00:00			`}`
			`# After each attempt, check if last attempt reached a better result.`
add: cve_linesearch, fix: cleaned and optimized gradient, add. notes 2019-09-02 19:07:56 +00:00			`if (loss < loss.best) {`
add: runtime test, add: new CVE pure R implementation, fix: small adaptation of legacy code to make it run, wip: ... 2019-08-30 19:16:52 +00:00			`loss.best <- loss`
			`V.best <- V`
			`}`
			`}`

			`return(list(`
add: cve_linesearch, fix: cleaned and optimized gradient, add. notes 2019-09-02 19:07:56 +00:00			`loss.history = loss.history,`
			`error.history = error.history,`
add: runtime test, add: new CVE pure R implementation, fix: small adaptation of legacy code to make it run, wip: ... 2019-08-30 19:16:52 +00:00			`loss = loss.best,`
			`V = V.best,`
add: cve_linesearch, fix: cleaned and optimized gradient, add. notes 2019-09-02 19:07:56 +00:00			`B = null(V.best),`
			`h = h`
add: runtime test, add: new CVE pure R implementation, fix: small adaptation of legacy code to make it run, wip: ... 2019-08-30 19:16:52 +00:00			`))`
			`}`