// // Usage (bash): // ~$ R -e "library(Rcpp); sourceCpp('runtime_tests_grad.cpp')" // // Usage (R): // > library(Rcpp) // > sourceCpp('runtime_tests_grad.cpp') // // // #define ARMA_DONT_USE_WRAPPER // #define ARMA_DONT_USE_BLAS // [[Rcpp::depends(RcppArmadillo)]] #include #include // [[Rcpp::export]] arma::mat arma_grad(const arma::mat& X, const arma::mat& X_diff, const arma::vec& Y, const arma::vec& Y_rep, const arma::mat& V, const double h) { using namespace arma; uword n = X.n_rows; uword p = X.n_cols; // orthogonal projection matrix `Q = I - VV'` for dist computation mat Q = -(V * V.t()); Q.diag() += 1; // calc pairwise distances as `D` with `D(i, j) = d_i(V, X_j)` vec D_vec = sum(square(X_diff * Q), 1); mat D = reshape(D_vec, n, n); // calc weights as `W` with `W(i, j) = w_i(V, X_j)` mat W = exp(D / (-2.0 * h)); W = normalise(W, 1); // colomn-wise, 1-norm vec W_vec = vectorise(W); // centered weighted `Y` means vec y1 = W.t() * Y; vec y2 = W.t() * square(Y); // loss for each X_i, meaning `L(i) = L_n(V, X_i)` vec L = y2 - square(y1); // "global" loss double loss = mean(L); // `G = \nabla_V L_n(V)` a.k.a. gradient of `L` with respect to `V` vec scale = (repelem(L, n, 1) - square(Y_rep - repelem(y1, n, 1))) % W_vec % D_vec; mat X_diff_scale = X_diff.each_col() % scale; mat G = X_diff_scale.t() * X_diff * V; G *= -2.0 / (h * h * n); return G; } // ATTENTION: assumed `X` stores X_i's column wise, `X = cbind(X_1, X_2, ..., X_n)` // [[Rcpp::export]] arma::mat grad(const arma::mat& X, const arma::vec& Y, const arma::mat& V, const double h ) { using namespace arma; // get dimensions const uword n = X.n_cols; const uword p = X.n_rows; const uword q = V.n_cols; // compute orthogonal projection mat Q = -(V * V.t()); Q.diag() += 1.0; // distance matrix `D(i, j) = d_i(V, X_j)` mat D(n, n, fill::zeros); // weights matrix `W(i, j) = w_i(V, X_j)` mat W(n, n, fill::ones); // exp(0) = 1 double mvm = 0.0; // Matrix Vector Mult. double sos = 0.0; // Sum Of Squares // double wcn = 0.0; // Weights Column Norm for (uword j = 0; j + 1 < n; ++j) { for (uword i = j + 1; i < n; ++i) { sos = 0.0; for (uword k = 0; k < p; ++k) { mvm = 0.0; for (uword l = 0; l < p; ++l) { mvm += Q(k, l) * (X(l, i) - X(l, j)); } sos += mvm * mvm; } D(i, j) = D(j, i) = sos; W(i, j) = W(j, i) = std::exp(sos / (-2. * h)); } } // column normalization of weights `W` double col_sum; for (uword j = 0; j < n; ++j) { col_sum = 0.0; for (uword i = 0; i < n; ++i) { col_sum += W(i, j); } for (uword i = 0; i < n; ++i) { W(i, j) /= col_sum; } } // weighted first, second order responce means `y1`, `y2` // and component wise Loss `L(i) = L_n(V, X_i)` vec y1(n); vec y2(n); vec L(n); double tmp; double loss = 0.0; for (uword i = 0; i < n; ++i) { mvm = 0.0; // Matrix Vector Mult. sos = 0.0; // Sum Of Squared (weighted) for (uword k = 0; k < n; ++k) { mvm += (tmp = W(k, i) * Y(k)); sos += tmp * Y(k); // W(k, i) * Y(k) * Y(k) } y1(i) = mvm; y2(i) = sos; loss += (L(i) = sos - (mvm * mvm)); // L_n(V, X_i) = y2(i) - y1(i)^2 } loss /= n; mat S(n, n); for (uword k = 0; k < n; ++k) { for (uword l = 0; l < n; ++l) { tmp = Y(k) - y1(l); S(k, l) = (L(l) - (tmp * tmp)) * W(k, l) * D(k, l); } } // gradient mat G(p, q); double factor = -2. / (n * h * h); double gij; for (uword j = 0; j < q; ++j) { for (uword i = 0; i < p; ++i) { gij = 0.0; for (uword k = 0; k < n; ++k) { for (uword l = 0; l < n; ++l) { mvm = 0.0; for (uword m = 0; m < p; ++m) { mvm += (X(m, l) - X(m, k)) * V(m, j); } // gij += (S(k, l) + S(l, k)) * (X(i, l) - X(i, k)); gij += S(k, l) * (X(i, l) - X(i, k)) * mvm; } } G(i, j) = factor * gij; } } return G; } // ATTENTION: assumed `X` stores X_i's column wise, `X = cbind(X_1, X_2, ..., X_n)` // [[Rcpp::export]] arma::mat grad_p(const arma::mat& X_ref, const arma::vec& Y_ref, const arma::mat& V_ref, const double h ) { using arma::uword; // get dimensions const uword n = X_ref.n_cols; const uword p = X_ref.n_rows; const uword q = V_ref.n_cols; const double* X = X_ref.memptr(); const double* Y = Y_ref.memptr(); const double* V = V_ref.memptr(); // allocate memory for entire algorithm // Q (p,p) D+W+S (n,n) y1+L (n) G (p,q) uword memsize = (p * p) + (3 * n * n) + (2 * n) + (p * q); double* mem = static_cast(malloc(sizeof(double) * memsize)); // assign pointer to memory associated memory area double* Q = mem; double* D = Q + (p * p); double* W = D + (n * n); double* S = W + (n * n); double* y1 = S + (n * n); double* L = y1 + n; double* G = L + n; // compute orthogonal projection double sum; // compute orthogonal projection `Q = I_p - V V'` for (uword j = 0; j < p; ++j) { for (uword i = j; i < p; ++i) { sum = 0.0; for (uword k = 0; k < q; ++k) { sum += V[k * p + i] * V[k * p + j]; } if (i == j) { Q[j * p + i] = 1.0 - sum; } else { Q[j * p + i] = Q[i * p + j] = -sum; } } } // set `diag(D) = 0` and `diag(W) = 1`. for (uword i = 0; i < n * n; i += n + 1) { D[i] = 0.0; W[i] = 1.0; } // components (using symmetrie) of `D` and `W` (except `diag`) double mvm = 0.0; // Matrix Vector Mult. for (uword j = 0; j + 1 < n; ++j) { for (uword i = j + 1; i < n; ++i) { sum = 0.0; for (uword k = 0; k < p; ++k) { mvm = 0.0; for (uword l = 0; l < p; ++l) { mvm += Q[l * p + k] * (X[i * p + l] - X[j * p + l]); } sum += mvm * mvm; } D[j * n + i] = D[i * n + j] = sum; W[j * n + i] = W[i * n + j] = std::exp(sum / (-2. * h)); } } // column normalization of weights `W` for (uword j = 0; j < n; ++j) { sum = 0.0; for (uword i = 0; i < n; ++i) { sum += W[j * n + i]; } for (uword i = 0; i < n; ++i) { W[j * n + i] /= sum; } } // weighted first, secend order responce means `y1`, `y2` // and component wise Loss `L(i) = L_n(V, X_i)` double tmp; double loss = 0.0; for (uword i = 0; i < n; ++i) { mvm = 0.0; // Matrix Vector Mult. sum = 0.0; // Sum Of (weighted) Squares for (uword k = 0; k < n; ++k) { mvm += (tmp = W[i * n + k] * Y[k]); sum += tmp * Y[k]; } y1[i] = mvm; loss += (L[i] = sum - (mvm * mvm)); } loss /= n; // scaling for gradient summation // this scaling matrix is the lower triangular matrix defined as // // S_kl := (s_{kl} + s_{lk}) D_{kl} // s_ij := (L_n(V, X_j) - (Y_i - y1(V, X_j))^2) W_ij double factor; for (uword l = 0; l < n; ++l) { for (uword k = l + 1; k < n; ++k) { tmp = Y[k] - y1[l]; factor = (L[l] - (tmp * tmp)) * W[l * n + k]; // \tile{S}_{kl} tmp = Y[l] - y1[k]; factor += (L[k] - (tmp * tmp)) * W[k * n + l]; // \tile{S}_{lk} S[l * n + k] = factor * D[l * n + k]; // (s_kl + s_lk) * D_kl } } // gradient // reuse memory area of `Q` // no longer needed and provides enough space (`q < p`) double* GD = Q; const double* X_l; const double* X_k; for (uword j = 0; j < p; ++j) { for (uword i = 0; i < p; ++i) { sum = 0.0; for (uword l = 0; l < n; ++l) { X_l = X + (l * p); for (uword k = l + 1; k < n; ++k) { X_k = X + (k * p); sum += S[l * n + k] * (X_l[i] - X_k[i]) * (X_l[j] - X_k[j]); } } GD[j * p + i] = sum; } } // distance gradient `DG` to gradient by multiplying with `V` factor = -2. / (n * h * h); for (uword i = 0; i < p; ++i) { for (uword j = 0; j < q; ++j) { sum = 0.0; for (uword k = 0; k < p; ++k) { sum += GD[k * p + i] * V[j * p + k]; } G[j * p + i] = factor * sum; } } // construct 'Armadillo' matrix from `G`s memory area arma::mat Grad(G, p, q); // free entire allocated memory block free(mem); return Grad; } /*** R suppressMessages(library(microbenchmark)) cat("Start timing:\n") time.start <- Sys.time() rStiefl <- function(p, q) { return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q)))) } ## compare runtimes n <- 200L p <- 12L q <- p - 2L X <- matrix(rnorm(n * p), n, p) Xt <- t(X) X_diff <- kronecker(rep(1, n), X) - kronecker(X, rep(1, n)) Y <- rnorm(n) Y_rep <- kronecker(rep(1, n), Y) # repmat(Y, n, 1) h <- 1. / 4.; V <- rStiefl(p, q) # A <- arma_grad(X, X_diff, Y, Y_rep, V, h) # G1 <- grad(Xt, Y, V, h) # G2 <- grad_p(Xt, Y, V, h) # # print(round(A[1:6, 1:6], 3)) # print(round(G1[1:6, 1:6], 3)) # print(round(G2[1:6, 1:6], 3)) # print(round(abs(A - G1), 9)) # print(round(abs(A - G2), 9)) # # q() comp <- function (A, B, tol = sqrt(.Machine$double.eps)) { max(abs(A - B)) < tol } comp.all <- function (res) { if (length(res) < 2) { return(TRUE) } res.one = res[[1]] for (i in 2:length(res)) { if (!comp(res.one, res[[i]])) { return(FALSE) } } return(TRUE) } counter <- 0 setup.tests <- function () { if ((counter %% 3) == 0) { X <<- matrix(rnorm(n * p), n, p) Xt <<- t(X) X_diff <<- kronecker(rep(1, n), X) - kronecker(X, rep(1, n)) Y <<- rnorm(n) Y_rep <<- kronecker(rep(1, n), Y) # arma::repmat(Y, n, 1) h <<- 1. / 4.; V <<- rStiefl(p, q) } counter <<- counter + 1 } mbm <- microbenchmark( arma = arma_grad(X, X_diff, Y, Y_rep, V, h), # grad = grad(Xt, Y, V, h), grad_p = grad_p(Xt, Y, V, h), check = comp.all, setup = setup.tests(), times = 100L ) cat("\033[1m\033[92mTotal time:", format(Sys.time() - time.start), '\n') print(mbm) cat("\033[0m") boxplot(mbm, las = 2, xlab = NULL) */