2
0
Fork 0
CVE/CVE_R/R/gradient.R

84 lines
3.1 KiB
R

#' Compute get gradient of `L(V)` given a dataset `X`.
#'
#' @param X Data matrix.
#' @param Y Responce.
#' @param V Position to compute the gradient at, aka point on Stiefl manifold.
#' @param h Bandwidth
#' @param loss.out Iff \code{TRUE} loss will be written to parent environment.
#' @param loss.only Boolean to only compute the loss, of \code{TRUE} a single
#' value loss is returned and \code{envir} is ignored.
#' @param persistent Determines if data indices and dependent calculations shall
#' be reused from the parent environment. ATTENTION: Do NOT set this flag, only
#' intended for internal usage by carefully aligned functions!
#' @keywords internal
#' @export
grad <- function(X, Y, V, h,
loss.out = FALSE,
loss.only = FALSE,
persistent = FALSE) {
# Get number of samples and dimension.
n <- nrow(X)
p <- ncol(X)
if (!persistent) {
# Compute lookup indexes for symmetrie, lower/upper
# triangular parts and vectorization.
pair.index <- elem.pairs(seq(n))
i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
# Matrix of vectorized indices. (vec(index) -> seq)
index <- matrix(seq(n * n), n, n)
lower <- index[lower.tri(index)]
upper <- t.default(index)[lower]
# Create all pairewise differences of rows of `X`.
X_diff <- X[i, , drop = F] - X[j, , drop = F]
}
# Projection matrix onto `span(V)`
Q <- diag(1, p) - tcrossprod(V, V)
# Vectorized distance matrix `D`.
vecD <- rowSums((X_diff %*% Q)^2)
# Weight matrix `W` (dnorm ... gaussean density function)
W <- matrix(1, n, n) # `exp(0) == 1`
W[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part
W[upper] <- t.default(W)[upper] # Mirror lower tri. to upper
W <- sweep(W, 2, colSums(W), FUN = `/`) # Col-Normalize
# Weighted `Y` momentums
y1 <- Y %*% W # Result is 1D -> transposition irrelevant
y2 <- Y^2 %*% W
# Per example loss `L(V, X_i)`
L <- y2 - y1^2
if (loss.only) {
return(mean(L))
}
if (loss.out) {
loss <<- mean(L)
}
# Vectorized Weights with forced symmetry
vecS <- (L[i] - (Y[j] - y1[i])^2) * W[lower]
vecS <- vecS + ((L[j] - (Y[i] - y1[j])^2) * W[upper])
# Compute scaling of `X` row differences
vecS <- vecS * vecD
# The gradient.
# 1. The `crossprod(A, B)` is equivalent to `t(A) %*% B`,
# 2. `(X_diff %*% V) * vecS` is first a marix matrix mult. and then using
# recycling to scale each row with the values of `vecS`.
# Note that `vecS` is a vector and that `R` uses column-major ordering
# of matrices.
# (See: notes for more details)
# TODO: Depending on n, p, q decide which version to take (for current
# datasets "inner" is faster, see: notes).
# inner = crossprod(X_diff, X_diff * vecS) %*% V,
# outer = crossprod(X_diff, (X_diff %*% V) * vecS)
G <- crossprod(X_diff, X_diff * vecS) %*% V
G <- (-2 / (n * h^2)) * G
return(G)
}