105 lines
2.8 KiB
R
105 lines
2.8 KiB
R
library(microbenchmark)
|
|
|
|
elem.pairs <- function(elements) {
|
|
# Number of elements to match.
|
|
n <- length(elements)
|
|
# Create all combinations.
|
|
pairs <- rbind(rep(elements, each=n), rep(elements, n))
|
|
# Select unique combinations without self interaction.
|
|
return(pairs[, pairs[1, ] < pairs[2, ]])
|
|
}
|
|
|
|
rStiefl <- function(p, q) {
|
|
return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
|
|
}
|
|
|
|
grad <- function(X, Y, V, h, persistent = TRUE) {
|
|
n <- nrow(X)
|
|
p <- ncol(X)
|
|
|
|
# Projection matrix onto `span(V)`
|
|
Q <- diag(1, p) - tcrossprod(V, V)
|
|
# Vectorized distance matrix `D`.
|
|
vecD <- rowSums((X_diff %*% Q)^2)
|
|
|
|
# Weight matrix `W` (dnorm ... gaussean density function)
|
|
W <- matrix(1, n, n) # `exp(0) == 1`
|
|
W[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part
|
|
W[upper] <- t.default(W)[upper] # Mirror lower tri. to upper
|
|
W <- sweep(W, 2, colSums(W), FUN = `/`) # Col-Normalize
|
|
|
|
# Weighted `Y` momentums
|
|
y1 <- Y %*% W # Result is 1D -> transposition irrelevant
|
|
y2 <- Y^2 %*% W
|
|
# Per example loss `L(V, X_i)`
|
|
L <- y2 - y1^2
|
|
|
|
# Vectorized Weights with forced symmetry
|
|
vecS <- (L[i] - (Y[j] - y1[i])^2) * W[lower]
|
|
vecS <- vecS + ((L[j] - (Y[i] - y1[j])^2) * W[upper])
|
|
# Compute scaling of `X` row differences
|
|
vecS <- vecS * vecD
|
|
|
|
G <- crossprod(X_diff, X_diff * vecS) %*% V
|
|
G <- (-2 / (n * h^2)) * G
|
|
return(G)
|
|
}
|
|
|
|
grad2 <- function(X, Y, V, h, persistent = TRUE) {
|
|
n <- nrow(X)
|
|
p <- ncol(X)
|
|
|
|
# Projection matrix onto `span(V)`
|
|
Q <- diag(1, p) - tcrossprod(V, V)
|
|
# Vectorized distance matrix `D`.
|
|
# vecD <- rowSums((X_diff %*% Q)^2)
|
|
vecD <- colSums(tcrossprod(Q, X_diff)^2)
|
|
|
|
# Create Kernel matrix (aka. apply kernel to distances)
|
|
K <- matrix(1, n, n) # `exp(0) == 1`
|
|
K[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part
|
|
K[upper] <- t(K)[upper] # Mirror lower tri. to upper
|
|
|
|
# Weighted `Y` momentums
|
|
colSumsK <- colSums(K)
|
|
y1 <- (K %*% Y) / colSumsK
|
|
y2 <- (K %*% Y^2) / colSumsK
|
|
# Per example loss `L(V, X_i)`
|
|
L <- y2 - y1^2
|
|
|
|
# Compute scaling vector `vecS` for `X_diff`.
|
|
tmp <- kronecker(matrix(y1, n, 1), matrix(Y, 1, n), `-`)^2
|
|
tmp <- as.vector(L) - tmp
|
|
tmp <- tmp * K / colSumsK
|
|
vecS <- (tmp + t(tmp))[lower] * vecD
|
|
|
|
G <- crossprod(X_diff, X_diff * vecS) %*% V
|
|
G <- (-2 / (n * h^2)) * G
|
|
return(G)
|
|
}
|
|
|
|
n <- 200
|
|
p <- 12
|
|
q <- 10
|
|
|
|
X <- matrix(runif(n * p), n, p)
|
|
Y <- runif(n)
|
|
V <- rStiefl(p, q)
|
|
h <- 0.1
|
|
|
|
pair.index <- elem.pairs(seq(n))
|
|
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
|
|
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
|
|
lower <- ((i - 1) * n) + j
|
|
upper <- ((j - 1) * n) + i
|
|
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
|
|
|
stopifnot(all.equal(
|
|
grad(X, Y, V, h),
|
|
grad2(X, Y, V, h)
|
|
))
|
|
microbenchmark(
|
|
grad = grad(X, Y, V, h),
|
|
grad2 = grad2(X, Y, V, h)
|
|
)
|