83 lines
3.0 KiB
R
83 lines
3.0 KiB
R
#' Compute get gradient of `L(V)` given a dataset `X`.
|
|
#'
|
|
#' @param X Data matrix.
|
|
#' @param Y Responce.
|
|
#' @param V Position to compute the gradient at, aka point on Stiefl manifold.
|
|
#' @param h Bandwidth
|
|
#' @param loss.out Iff \code{TRUE} loss will be written to parent environment.
|
|
#' @param loss.only Boolean to only compute the loss, of \code{TRUE} a single
|
|
#' value loss is returned and \code{envir} is ignored.
|
|
#' @param persistent Determines if data indices and dependent calculations shall
|
|
#' be reused from the parent environment. ATTENTION: Do NOT set this flag, only
|
|
#' intended for internal usage by carefully aligned functions!
|
|
#' @keywords internal
|
|
#' @export
|
|
grad <- function(X, Y, V, h,
|
|
loss.out = FALSE,
|
|
loss.only = FALSE,
|
|
persistent = FALSE) {
|
|
# Get number of samples and dimension.
|
|
n <- nrow(X)
|
|
p <- ncol(X)
|
|
|
|
if (!persistent) {
|
|
# Compute lookup indexes for symmetrie, lower/upper
|
|
# triangular parts and vectorization.
|
|
pair.index <- elem.pairs(seq(n))
|
|
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
|
|
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
|
|
# Index of vectorized matrix, for lower and upper triangular part.
|
|
lower <- ((i - 1) * n) + j
|
|
upper <- ((j - 1) * n) + i
|
|
|
|
# Create all pairewise differences of rows of `X`.
|
|
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
|
}
|
|
|
|
# Projection matrix onto `span(V)`
|
|
Q <- diag(1, p) - tcrossprod(V, V)
|
|
|
|
# Vectorized distance matrix `D`.
|
|
vecD <- colSums(tcrossprod(Q, X_diff)^2)
|
|
|
|
# Create Kernel matrix (aka. apply kernel to distances)
|
|
K <- matrix(1, n, n) # `exp(0) == 1`
|
|
K[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part
|
|
K[upper] <- t(K)[upper] # Mirror lower tri. to upper
|
|
|
|
# Weighted `Y` momentums
|
|
colSumsK <- colSums(K)
|
|
y1 <- (K %*% Y) / colSumsK
|
|
y2 <- (K %*% Y^2) / colSumsK
|
|
|
|
# Per example loss `L(V, X_i)`
|
|
L <- y2 - y1^2
|
|
if (loss.only) {
|
|
return(mean(L))
|
|
}
|
|
if (loss.out) {
|
|
loss <<- mean(L)
|
|
}
|
|
|
|
# Compute scaling vector `vecS` for `X_diff`.
|
|
tmp <- kronecker(matrix(y1, n, 1), matrix(Y, 1, n), `-`)^2
|
|
tmp <- as.vector(L) - tmp
|
|
tmp <- tmp * K / colSumsK
|
|
vecS <- (tmp + t(tmp))[lower] * vecD
|
|
|
|
# The gradient.
|
|
# 1. The `crossprod(A, B)` is equivalent to `t(A) %*% B`,
|
|
# 2. `(X_diff %*% V) * vecS` is first a marix matrix mult. and then using
|
|
# recycling to scale each row with the values of `vecS`.
|
|
# Note that `vecS` is a vector and that `R` uses column-major ordering
|
|
# of matrices.
|
|
# (See: notes for more details)
|
|
# TODO: Depending on n, p, q decide which version to take (for current
|
|
# datasets "inner" is faster, see: notes).
|
|
# inner = crossprod(X_diff, X_diff * vecS) %*% V,
|
|
# outer = crossprod(X_diff, (X_diff %*% V) * vecS)
|
|
G <- crossprod(X_diff, X_diff * vecS) %*% V
|
|
G <- (-2 / (n * h^2)) * G
|
|
return(G)
|
|
}
|