2019-08-30 19:16:52 +00:00
|
|
|
#' Compute get gradient of `L(V)` given a dataset `X`.
|
|
|
|
#'
|
|
|
|
#' @param X Data matrix.
|
|
|
|
#' @param Y Responce.
|
|
|
|
#' @param V Position to compute the gradient at, aka point on Stiefl manifold.
|
|
|
|
#' @param h Bandwidth
|
|
|
|
#' @param loss.only Boolean to only compute the loss, of \code{TRUE} a single
|
|
|
|
#' value loss is returned and \code{envir} is ignored.
|
|
|
|
#' @keywords internal
|
|
|
|
#' @export
|
2019-09-02 13:22:35 +00:00
|
|
|
grad <- function(X, Y, V, h, loss.out = FALSE, loss.log = FALSE, loss.only = FALSE) {
|
2019-08-30 19:16:52 +00:00
|
|
|
# Get number of samples and dimension.
|
|
|
|
n <- nrow(X)
|
|
|
|
p <- ncol(X)
|
|
|
|
|
|
|
|
# Compute lookup indexes for symmetrie, lower/upper
|
|
|
|
# triangular parts and vectorization.
|
|
|
|
pair.index <- elem.pairs(seq(n))
|
|
|
|
i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
|
|
|
|
j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
|
|
|
|
# Matrix of vectorized indices. (vec(index) -> seq)
|
|
|
|
index <- matrix(seq(n * n), n, n)
|
|
|
|
lower <- index[lower.tri(index)]
|
|
|
|
upper <- t(index)[lower]
|
|
|
|
|
2019-09-02 13:22:35 +00:00
|
|
|
# Create all pairewise differences of rows of `X`.
|
|
|
|
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
|
|
|
|
2019-08-30 19:16:52 +00:00
|
|
|
# Projection matrix onto `span(V)`
|
|
|
|
Q <- diag(1, p) - (V %*% t(V))
|
|
|
|
|
|
|
|
# Vectorized distance matrix `D`.
|
|
|
|
vecD <- rowSums((X_diff %*% Q)^2)
|
|
|
|
|
|
|
|
# Weight matrix `W` (dnorm ... gaussean density function)
|
|
|
|
W <- matrix(dnorm(0), n, n)
|
|
|
|
W[lower] <- dnorm(vecD / h) # Set lower tri. part
|
|
|
|
W[upper] <- t(W)[upper] # Mirror lower tri. to upper
|
2019-09-02 13:22:35 +00:00
|
|
|
W <- sweep(W, 2, colSums(W), FUN = `/`) # Col-Normalize
|
2019-08-30 19:16:52 +00:00
|
|
|
|
|
|
|
# Weighted `Y` momentums
|
|
|
|
y1 <- Y %*% W # Result is 1D -> transposition irrelevant
|
|
|
|
y2 <- Y^2 %*% W
|
|
|
|
|
|
|
|
# Per example loss `L(V, X_i)`
|
|
|
|
L <- y2 - y1^2
|
2019-09-02 13:22:35 +00:00
|
|
|
if (loss.out | loss.log | loss.only) {
|
|
|
|
meanL <- mean(L)
|
|
|
|
if (loss.out) {
|
|
|
|
# Bubble environments up and write to loss variable, aka out param.
|
|
|
|
loss <<- meanL
|
|
|
|
}
|
|
|
|
if (loss.log) {
|
|
|
|
loss.history[epoch, attempt] <<- meanL
|
|
|
|
}
|
|
|
|
if (loss.only) {
|
|
|
|
# Mean for total loss `L(V)`.
|
|
|
|
return(meanL)
|
|
|
|
}
|
2019-08-30 19:16:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
# Vectorized Weights with forced symmetry
|
|
|
|
vecS <- (L[i] - (Y[j] - y1[i])^2) * W[lower]
|
|
|
|
vecS <- vecS + ((L[j] - (Y[i] - y1[j])^2) * W[upper])
|
|
|
|
# Compute scaling of `X` row differences
|
|
|
|
vecS <- vecS * vecD
|
|
|
|
|
|
|
|
# The gradient.
|
|
|
|
G <- t(X_diff) %*% sweep(X_diff %*% V, 1, vecS, `*`)
|
|
|
|
G <- (-2 / (n * h^2)) * G
|
|
|
|
return(G)
|
|
|
|
}
|