CVE/CVE_R/R/gradient.R

#' Compute get gradient of `L(V)` given a dataset `X`.
#'
#' @param X Data matrix.
#' @param Y Responce.
#' @param V Position to compute the gradient at, aka point on Stiefl manifold.
#' @param h Bandwidth
#' @param loss.out Iff \code{TRUE} loss will be written to parent environment.
#' @param loss.only Boolean to only compute the loss, of \code{TRUE} a single
#'  value loss is returned and \code{envir} is ignored.
#' @param persistent Determines if data indices and dependent calculations shall
#'  be reused from the parent environment. ATTENTION: Do NOT set this flag, only
#'  intended for internal usage by carefully aligned functions!
#' @keywords internal
#' @export
grad <- function(X, Y, V, h,
                 loss.out = FALSE,
                 loss.only = FALSE,
                 persistent = FALSE) {
    # Get number of samples and dimension.
    n <- nrow(X)
    p <- ncol(X)

    if (!persistent) {
        # Compute lookup indexes for symmetrie, lower/upper
        # triangular parts and vectorization.
        pair.index <- elem.pairs(seq(n))
        i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
        j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
        # Index of vectorized matrix, for lower and upper triangular part.
        lower <- ((i - 1) * n) + j
        upper <- ((j - 1) * n) + i

        # Create all pairewise differences of rows of `X`.
        X_diff <- X[i, , drop = F] - X[j, , drop = F]
    }

    # Projection matrix onto `span(V)`
    Q <- diag(1, p) - tcrossprod(V, V)

    # Vectorized distance matrix `D`.
    vecD <- colSums(tcrossprod(Q, X_diff)^2)

    # Create Kernel matrix (aka. apply kernel to distances)
    K <- matrix(1, n, n) # `exp(0) == 1`
    K[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part
    K[upper] <- t(K)[upper] # Mirror lower tri. to upper

    # Weighted `Y` momentums
    colSumsK <- colSums(K)
    y1 <- (K %*% Y) / colSumsK
    y2 <- (K %*% Y^2) / colSumsK

    # Per example loss `L(V, X_i)`
    L <- y2 - y1^2
    if (loss.only) {
        return(mean(L))
    }
    if (loss.out) {
        loss <<- mean(L)
    }

    # Compute scaling vector `vecS` for `X_diff`.
    tmp <- kronecker(matrix(y1, n, 1), matrix(Y, 1, n), `-`)^2
    tmp <- as.vector(L) - tmp
    tmp <- tmp * K / colSumsK
    vecS <- (tmp + t(tmp))[lower] * vecD

    # The gradient.
    # 1. The `crossprod(A, B)` is equivalent to `t(A) %*% B`,
    # 2. `(X_diff %*% V) * vecS` is first a marix matrix mult. and then using
    #    recycling to scale each row with the values of `vecS`.
    #    Note that `vecS` is a vector and that `R` uses column-major ordering
    #    of matrices.
    # (See: notes for more details)
    # TODO: Depending on n, p, q decide which version to take (for current
    #    datasets "inner" is faster, see: notes).
    #    inner = crossprod(X_diff, X_diff * vecS) %*% V,
    #    outer = crossprod(X_diff, (X_diff %*% V) * vecS)
    G <- crossprod(X_diff, X_diff * vecS) %*% V
    G <- (-2 / (n * h^2)) * G
    return(G)
}
add: runtime test, add: new CVE pure R implementation, fix: small adaptation of legacy code to make it run, wip: ... 2019-08-30 19:16:52 +00:00			#' Compute get gradient of `L(V)` given a dataset `X`.
			`#'`
			`#' @param X Data matrix.`
			`#' @param Y Responce.`
			`#' @param V Position to compute the gradient at, aka point on Stiefl manifold.`
			`#' @param h Bandwidth`
add: cve_linesearch, fix: cleaned and optimized gradient, add. notes 2019-09-02 19:07:56 +00:00			`#' @param loss.out Iff \code{TRUE} loss will be written to parent environment.`
add: runtime test, add: new CVE pure R implementation, fix: small adaptation of legacy code to make it run, wip: ... 2019-08-30 19:16:52 +00:00			`#' @param loss.only Boolean to only compute the loss, of \code{TRUE} a single`
			`#' value loss is returned and \code{envir} is ignored.`
add: cve_linesearch, fix: cleaned and optimized gradient, add. notes 2019-09-02 19:07:56 +00:00			`#' @param persistent Determines if data indices and dependent calculations shall`
			`#' be reused from the parent environment. ATTENTION: Do NOT set this flag, only`
			`#' intended for internal usage by carefully aligned functions!`
add: runtime test, add: new CVE pure R implementation, fix: small adaptation of legacy code to make it run, wip: ... 2019-08-30 19:16:52 +00:00			`#' @keywords internal`
			`#' @export`
add: cve_linesearch, fix: cleaned and optimized gradient, add. notes 2019-09-02 19:07:56 +00:00			`grad <- function(X, Y, V, h,`
			`loss.out = FALSE,`
			`loss.only = FALSE,`
			`persistent = FALSE) {`
add: runtime test, add: new CVE pure R implementation, fix: small adaptation of legacy code to make it run, wip: ... 2019-08-30 19:16:52 +00:00			`# Get number of samples and dimension.`
			`n <- nrow(X)`
			`p <- ncol(X)`

add: cve_linesearch, fix: cleaned and optimized gradient, add. notes 2019-09-02 19:07:56 +00:00			`if (!persistent) {`
			`# Compute lookup indexes for symmetrie, lower/upper`
			`# triangular parts and vectorization.`
			`pair.index <- elem.pairs(seq(n))`
wip: writing C gradient version 2019-09-12 16:42:28 +00:00			i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
			j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
			`# Index of vectorized matrix, for lower and upper triangular part.`
			`lower <- ((i - 1) * n) + j`
			`upper <- ((j - 1) * n) + i`
add: runtime test, add: new CVE pure R implementation, fix: small adaptation of legacy code to make it run, wip: ... 2019-08-30 19:16:52 +00:00
add: cve_linesearch, fix: cleaned and optimized gradient, add. notes 2019-09-02 19:07:56 +00:00			# Create all pairewise differences of rows of `X`.
			`X_diff <- X[i, , drop = F] - X[j, , drop = F]`
			`}`
fix: cve_simple runs correct, add: notes 2019-09-02 13:22:35 +00:00
add: runtime test, add: new CVE pure R implementation, fix: small adaptation of legacy code to make it run, wip: ... 2019-08-30 19:16:52 +00:00			# Projection matrix onto `span(V)`
add: cve_linesearch, fix: cleaned and optimized gradient, add. notes 2019-09-02 19:07:56 +00:00			`Q <- diag(1, p) - tcrossprod(V, V)`
add: runtime test, add: new CVE pure R implementation, fix: small adaptation of legacy code to make it run, wip: ... 2019-08-30 19:16:52 +00:00
			# Vectorized distance matrix `D`.
wip: writing C gradient version 2019-09-12 16:42:28 +00:00			`vecD <- colSums(tcrossprod(Q, X_diff)^2)`
add: runtime test, add: new CVE pure R implementation, fix: small adaptation of legacy code to make it run, wip: ... 2019-08-30 19:16:52 +00:00
Improved runtime of pure R grad. 2019-09-16 09:57:10 +00:00			`# Create Kernel matrix (aka. apply kernel to distances)`
			K <- matrix(1, n, n) # `exp(0) == 1`
			`K[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part`
			`K[upper] <- t(K)[upper] # Mirror lower tri. to upper`
add: runtime test, add: new CVE pure R implementation, fix: small adaptation of legacy code to make it run, wip: ... 2019-08-30 19:16:52 +00:00
			# Weighted `Y` momentums
Improved runtime of pure R grad. 2019-09-16 09:57:10 +00:00			`colSumsK <- colSums(K)`
			`y1 <- (K %*% Y) / colSumsK`
			`y2 <- (K %*% Y^2) / colSumsK`
add: runtime test, add: new CVE pure R implementation, fix: small adaptation of legacy code to make it run, wip: ... 2019-08-30 19:16:52 +00:00
			# Per example loss `L(V, X_i)`
			`L <- y2 - y1^2`
add: cve_linesearch, fix: cleaned and optimized gradient, add. notes 2019-09-02 19:07:56 +00:00			`if (loss.only) {`
			`return(mean(L))`
			`}`
			`if (loss.out) {`
			`loss <<- mean(L)`
add: runtime test, add: new CVE pure R implementation, fix: small adaptation of legacy code to make it run, wip: ... 2019-08-30 19:16:52 +00:00			`}`

Improved runtime of pure R grad. 2019-09-16 09:57:10 +00:00			# Compute scaling vector `vecS` for `X_diff`.
			tmp <- kronecker(matrix(y1, n, 1), matrix(Y, 1, n), `-`)^2
			`tmp <- as.vector(L) - tmp`
			`tmp <- tmp * K / colSumsK`
			`vecS <- (tmp + t(tmp))[lower] * vecD`
add: runtime test, add: new CVE pure R implementation, fix: small adaptation of legacy code to make it run, wip: ... 2019-08-30 19:16:52 +00:00
			`# The gradient.`
add: to notes, fix: bottleneck in grad 2019-09-03 18:43:34 +00:00			# 1. The `crossprod(A, B)` is equivalent to `t(A) %*% B`,
			# 2. `(X_diff %% V) vecS` is first a marix matrix mult. and then using
			# recycling to scale each row with the values of `vecS`.
			# Note that `vecS` is a vector and that `R` uses column-major ordering
			`# of matrices.`
			`# (See: notes for more details)`
			`# TODO: Depending on n, p, q decide which version to take (for current`
			`# datasets "inner" is faster, see: notes).`
			`# inner = crossprod(X_diff, X_diff * vecS) %*% V,`
			`# outer = crossprod(X_diff, (X_diff %% V) vecS)`
			`G <- crossprod(X_diff, X_diff * vecS) %*% V`
add: runtime test, add: new CVE pure R implementation, fix: small adaptation of legacy code to make it run, wip: ... 2019-08-30 19:16:52 +00:00			`G <- (-2 / (n * h^2)) * G`
			`return(G)`
			`}`