library(microbenchmark) elem.pairs <- function(elements) { # Number of elements to match. n <- length(elements) # Create all combinations. pairs <- rbind(rep(elements, each=n), rep(elements, n)) # Select unique combinations without self interaction. return(pairs[, pairs[1, ] < pairs[2, ]]) } rStiefl <- function(p, q) { return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q)))) } grad <- function(X, Y, V, h, persistent = TRUE) { n <- nrow(X) p <- ncol(X) # Projection matrix onto `span(V)` Q <- diag(1, p) - tcrossprod(V, V) # Vectorized distance matrix `D`. vecD <- rowSums((X_diff %*% Q)^2) # Weight matrix `W` (dnorm ... gaussean density function) W <- matrix(1, n, n) # `exp(0) == 1` W[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part W[upper] <- t.default(W)[upper] # Mirror lower tri. to upper W <- sweep(W, 2, colSums(W), FUN = `/`) # Col-Normalize # Weighted `Y` momentums y1 <- Y %*% W # Result is 1D -> transposition irrelevant y2 <- Y^2 %*% W # Per example loss `L(V, X_i)` L <- y2 - y1^2 # Vectorized Weights with forced symmetry vecS <- (L[i] - (Y[j] - y1[i])^2) * W[lower] vecS <- vecS + ((L[j] - (Y[i] - y1[j])^2) * W[upper]) # Compute scaling of `X` row differences vecS <- vecS * vecD G <- crossprod(X_diff, X_diff * vecS) %*% V G <- (-2 / (n * h^2)) * G return(G) } grad2 <- function(X, Y, V, h, persistent = TRUE) { n <- nrow(X) p <- ncol(X) # Projection matrix onto `span(V)` Q <- diag(1, p) - tcrossprod(V, V) # Vectorized distance matrix `D`. # vecD <- rowSums((X_diff %*% Q)^2) vecD <- colSums(tcrossprod(Q, X_diff)^2) # Create Kernel matrix (aka. apply kernel to distances) K <- matrix(1, n, n) # `exp(0) == 1` K[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part K[upper] <- t(K)[upper] # Mirror lower tri. to upper # Weighted `Y` momentums colSumsK <- colSums(K) y1 <- (K %*% Y) / colSumsK y2 <- (K %*% Y^2) / colSumsK # Per example loss `L(V, X_i)` L <- y2 - y1^2 # Compute scaling vector `vecS` for `X_diff`. tmp <- kronecker(matrix(y1, n, 1), matrix(Y, 1, n), `-`)^2 tmp <- as.vector(L) - tmp tmp <- tmp * K / colSumsK vecS <- (tmp + t(tmp))[lower] * vecD G <- crossprod(X_diff, X_diff * vecS) %*% V G <- (-2 / (n * h^2)) * G return(G) } n <- 200 p <- 12 q <- 10 X <- matrix(runif(n * p), n, p) Y <- runif(n) V <- rStiefl(p, q) h <- 0.1 pair.index <- elem.pairs(seq(n)) i <- pair.index[1, ] # `i` indices of `(i, j)` pairs j <- pair.index[2, ] # `j` indices of `(i, j)` pairs lower <- ((i - 1) * n) + j upper <- ((j - 1) * n) + i X_diff <- X[i, , drop = F] - X[j, , drop = F] stopifnot(all.equal( grad(X, Y, V, h), grad2(X, Y, V, h) )) microbenchmark( grad = grad(X, Y, V, h), grad2 = grad2(X, Y, V, h) )