From 998f8d3568881a6e03e61e5a61319c3422dde368 Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 16 Sep 2019 11:28:06 +0200 Subject: [PATCH] cleanup --- benchmark.R | 348 ++++++++++++++++++++++++++++++++++++ wip.c => benchmark.c | 0 wip.h => benchmark.h | 0 build_install.R | 43 +++++ runtime_tests_grad.cpp | 394 ----------------------------------------- test.R | 60 +++++++ validate.R | 104 ----------- wip.R | 322 ++++----------------------------- 8 files changed, 490 insertions(+), 781 deletions(-) create mode 100644 benchmark.R rename wip.c => benchmark.c (100%) rename wip.h => benchmark.h (100%) create mode 100644 build_install.R delete mode 100644 runtime_tests_grad.cpp create mode 100644 test.R delete mode 100644 validate.R diff --git a/benchmark.R b/benchmark.R new file mode 100644 index 0000000..25eb66d --- /dev/null +++ b/benchmark.R @@ -0,0 +1,348 @@ +library(microbenchmark) + +dyn.load("wip.so") + + +## rowSum* .call -------------------------------------------------------------- +rowSums.c <- function(M) { + stopifnot( + is.matrix(M), + is.numeric(M) + ) + if (!is.double(M)) { + M <- matrix(as.double(M), nrow = nrow(M)) + } + + .Call('R_rowSums', PACKAGE = 'wip', M) +} +colSums.c <- function(M) { + stopifnot( + is.matrix(M), + is.numeric(M) + ) + if (!is.double(M)) { + M <- matrix(as.double(M), nrow = nrow(M)) + } + + .Call('R_colSums', PACKAGE = 'wip', M) +} +rowSquareSums.c <- function(M) { + stopifnot( + is.matrix(M), + is.numeric(M) + ) + if (!is.double(M)) { + M <- matrix(as.double(M), nrow = nrow(M)) + } + + .Call('R_rowSquareSums', PACKAGE = 'wip', M) +} +rowSumsSymVec.c <- function(vecA, nrow, diag = 0.0) { + stopifnot( + is.vector(vecA), + is.numeric(vecA), + is.numeric(diag), + nrow * (nrow - 1) == length(vecA) * 2 + ) + if (!is.double(vecA)) { + vecA <- as.double(vecA) + } + .Call('R_rowSumsSymVec', PACKAGE = 'wip', + vecA, as.integer(nrow), as.double(diag)) +} +rowSweep.c <- function(A, v, op = '-') { + stopifnot( + is.matrix(A), + is.numeric(v) + ) + if (!is.double(A)) { + A <- matrix(as.double(A), nrow = nrow(A)) + } + if (!is.vector(v) || !is.double(v)) { + v <- as.double(v) + } + stopifnot( + nrow(A) == length(v), + op %in% c('+', '-', '*', '/') + ) + + .Call('R_rowSweep', PACKAGE = 'wip', A, v, op) +} + +## row*, col* tests ------------------------------------------------------------ +n <- 3000 +M <- matrix(runif(n * 12), n, 12) +stopifnot( + all.equal(rowSums(M^2), rowSums.c(M^2)), + all.equal(colSums(M), colSums.c(M)) +) +microbenchmark( + rowSums = rowSums(M^2), + rowSums.c = rowSums.c(M^2), + rowSqSums.c = rowSquareSums.c(M) +) +microbenchmark( + colSums = colSums(M), + colSums.c = colSums.c(M) +) + +sum = rowSums(M) +stopifnot(all.equal( + sweep(M, 1, sum, FUN = `/`), + rowSweep.c(M, sum, '/') # Col-Normalize) +), all.equal( + sweep(M, 1, sum, FUN = `/`), + M / sum +)) +microbenchmark( + sweep = sweep(M, 1, sum, FUN = `/`), + M / sum, + rowSweep.c = rowSweep.c(M, sum, '/') # Col-Normalize) +) + +# Create symmetric matrix with constant diagonal entries. +nrow <- 200 +diag <- 1.0 +Sym <- tcrossprod(runif(nrow)) +diag(Sym) <- diag +# Get vectorized lower triangular part of `Sym` matrix. +SymVec <- Sym[lower.tri(Sym)] +stopifnot(all.equal( + rowSums(Sym), + rowSumsSymVec.c(SymVec, nrow, diag) +)) +microbenchmark( + rowSums = rowSums(Sym), + rowSums.c = rowSums.c(Sym), + rowSumsSymVec.c = rowSumsSymVec.c(SymVec, nrow, diag) +) + +## Matrix-Matrix opperation .call --------------------------------------------- +transpose.c <- function(A) { + stopifnot( + is.matrix(A), is.numeric(A) + ) + if (!is.double(A)) { + A <- matrix(as.double(A), nrow(A), ncol(A)) + } + + .Call('R_transpose', PACKAGE = 'wip', A) +} + +matrixprod.c <- function(A, B) { + stopifnot( + is.matrix(A), is.numeric(A), + is.matrix(B), is.numeric(B), + ncol(A) == nrow(B) + ) + if (!is.double(A)) { + A <- matrix(as.double(A), nrow = nrow(A)) + } + if (!is.double(B)) { + B <- matrix(as.double(B), nrow = nrow(B)) + } + + .Call('R_matrixprod', PACKAGE = 'wip', A, B) +} +crossprod.c <- function(A, B) { + stopifnot( + is.matrix(A), is.numeric(A), + is.matrix(B), is.numeric(B), + nrow(A) == nrow(B) + ) + if (!is.double(A)) { + A <- matrix(as.double(A), nrow = nrow(A)) + } + if (!is.double(B)) { + B <- matrix(as.double(B), nrow = nrow(B)) + } + + .Call('R_crossprod', PACKAGE = 'wip', A, B) +} +skewSymRank2k.c <- function(A, B, alpha = 1, beta = 0) { + stopifnot( + is.matrix(A), is.numeric(A), + is.matrix(B), is.numeric(B), + all(dim(A) == dim(B)), + is.numeric(alpha), length(alpha) == 1L, + is.numeric(beta), length(beta) == 1L + ) + if (!is.double(A)) { + A <- matrix(as.double(A), nrow = nrow(A)) + } + if (!is.double(B)) { + B <- matrix(as.double(B), nrow = nrow(B)) + } + + .Call('R_skewSymRank2k', PACKAGE = 'wip', A, B, + as.double(alpha), as.double(beta)) +} + +## Matrix-Matrix opperation tests --------------------------------------------- +n <- 200 +k <- 100 +m <- 300 + +A <- matrix(runif(n * k), n, k) +B <- matrix(runif(k * m), k, m) +stopifnot( + all.equal(t(A), transpose.c(A)) +) +microbenchmark( + t(A), + transpose.c(A) +) + +stopifnot( + all.equal(A %*% B, matrixprod.c(A, B)) +) +microbenchmark( + "%*%" = A %*% B, + matrixprod.c = matrixprod.c(A, B) +) + +A <- matrix(runif(k * n), k, n) +B <- matrix(runif(k * m), k, m) +stopifnot( + all.equal(crossprod(A, B), crossprod.c(A, B)) +) +microbenchmark( + crossprod = crossprod(A, B), + crossprod.c = crossprod.c(A, B) +) + +n <- 12 +k <- 11 +A <- matrix(runif(n * k), n, k) +B <- matrix(runif(n * k), n, k) +stopifnot(all.equal( + A %*% t(B) - B %*% t(A), skewSymRank2k.c(A, B) +)) +microbenchmark( + A %*% t(B) - B %*% t(A), + skewSymRank2k.c(A, B) +) + +## Orthogonal projection onto null space .Call -------------------------------- +nullProj.c <- function(B) { + stopifnot( + is.matrix(B), is.numeric(B) + ) + if (!is.double(B)) { + B <- matrix(as.double(B), nrow = nrow(B)) + } + + .Call('R_nullProj', PACKAGE = 'wip', B) +} +## Orthogonal projection onto null space tests -------------------------------- +p <- 12 +q <- 10 +V <- qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))) + +# Projection matrix onto `span(V)` +Q <- diag(1, p) - tcrossprod(V, V) +stopifnot( + all.equal(Q, nullProj.c(V)) +) +microbenchmark( + nullProj = diag(1, p) - tcrossprod(V, V), + nullProj.c = nullProj.c(V) +) + + +# ## WIP for gradient. ---------------------------------------------------------- + +gradient.c <- function(X, X_diff, Y, V, h) { + stopifnot( + is.matrix(X), is.double(X), + is.matrix(X_diff), is.double(X_diff), + ncol(X_diff) == ncol(X), nrow(X_diff) == nrow(X) * (nrow(X) - 1) / 2, + is.vector(Y) || (is.matrix(Y) && pmin(dim(Y)) == 1L), is.double(Y), + length(Y) == nrow(X), + is.matrix(V), is.double(V), + nrow(V) == ncol(X), + is.vector(h), is.numeric(h), length(h) == 1 + ) + + .Call('R_gradient', PACKAGE = 'wip', + X, X_diff, as.double(Y), V, as.double(h)); +} + +elem.pairs <- function(elements) { + # Number of elements to match. + n <- length(elements) + # Create all combinations. + pairs <- rbind(rep(elements, each=n), rep(elements, n)) + # Select unique combinations without self interaction. + return(pairs[, pairs[1, ] < pairs[2, ]]) +} +grad <- function(X, Y, V, h, persistent = TRUE) { + n <- nrow(X) + p <- ncol(X) + + if (!persistent) { + pair.index <- elem.pairs(seq(n)) + i <- pair.index[, 1] # `i` indices of `(i, j)` pairs + j <- pair.index[, 2] # `j` indices of `(i, j)` pairs + lower <- ((i - 1) * n) + j + upper <- ((j - 1) * n) + i + X_diff <- X[i, , drop = F] - X[j, , drop = F] + } + + # Projection matrix onto `span(V)` + Q <- diag(1, p) - tcrossprod(V, V) + # Vectorized distance matrix `D`. + vecD <- rowSums((X_diff %*% Q)^2) + + + # Weight matrix `W` (dnorm ... gaussean density function) + W <- matrix(1, n, n) # `exp(0) == 1` + W[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part + W[upper] <- t.default(W)[upper] # Mirror lower tri. to upper + W <- sweep(W, 2, colSums(W), FUN = `/`) # Col-Normalize + + # Weighted `Y` momentums + y1 <- Y %*% W # Result is 1D -> transposition irrelevant + y2 <- Y^2 %*% W + + # Per example loss `L(V, X_i)` + L <- y2 - y1^2 + + # Vectorized Weights with forced symmetry + vecS <- (L[i] - (Y[j] - y1[i])^2) * W[lower] + vecS <- vecS + ((L[j] - (Y[i] - y1[j])^2) * W[upper]) + # Compute scaling of `X` row differences + vecS <- vecS * vecD + + G <- crossprod(X_diff, X_diff * vecS) %*% V + G <- (-2 / (n * h^2)) * G + return(G) +} +rStiefl <- function(p, q) { + return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q)))) +} + +n <- 200 +p <- 12 +q <- 10 + +X <- matrix(runif(n * p), n, p) +Y <- runif(n) +V <- rStiefl(p, q) +h <- 0.1 + +pair.index <- elem.pairs(seq(n)) +i <- pair.index[1, ] # `i` indices of `(i, j)` pairs +j <- pair.index[2, ] # `j` indices of `(i, j)` pairs +lower <- ((i - 1) * n) + j +upper <- ((j - 1) * n) + i +X_diff <- X[i, , drop = F] - X[j, , drop = F] + +stopifnot(all.equal( + grad(X, Y, V, h), + gradient.c(X, X_diff, Y, V, h) +)) +microbenchmark( + grad = grad(X, Y, V, h), + gradient.c = gradient.c(X, X_diff, Y, V, h) +) diff --git a/wip.c b/benchmark.c similarity index 100% rename from wip.c rename to benchmark.c diff --git a/wip.h b/benchmark.h similarity index 100% rename from wip.h rename to benchmark.h diff --git a/build_install.R b/build_install.R new file mode 100644 index 0000000..367da68 --- /dev/null +++ b/build_install.R @@ -0,0 +1,43 @@ +## Installing CVE (C implementation) +(setwd('~/Projects/CVE/CVE_C')) +# equiv to Rcpp::compileAttributes(). +library(devtools) +pkgbuild::compile_dll() +document() # See bug: https://github.com/stan-dev/rstantools/issues/52 +pkgbuild::clean_dll() +(path <- build(vignettes = FALSE)) +install.packages(path, repos = NULL, type = "source") +library(CVE) + +## Installing CVEpureR +(setwd('~/Projects/CVE/CVE_R')) +library(devtools) +document() # See bug: https://github.com/stan-dev/rstantools/issues/52 +(path <- build(vignettes = FALSE)) +install.packages(path, repos = NULL, type = "source") +library(CVEpureR) + + +ds <- dataset("M1") +gc() +path <- '~/Projects/CVE/tmp/R.prof' +Rprof(path, append = F, line.profiling = T) +cve.res <- cve.call(ds$X, ds$Y, k = ncol(ds$B)) # , method = "linesearch" +Rprof(NULL) +(prof <- summaryRprof(path)) # , lines = "both")) +cve.res[[ncol(ds$B)]]$loss + +X <- ds$X +Y <- ds$Y +k <- ncol(ds$B) +system.time( + cve.res <- cve(Y ~ X, k = k) +) + +system.time( + cve.res <- cve(Y ~ X, k = k, method = "sgd", tau = 0.01, batch.size = 32L) +) + +system.time( + cve.res <- cve(Y ~ X, k = k, method = "linesearch") +) diff --git a/runtime_tests_grad.cpp b/runtime_tests_grad.cpp deleted file mode 100644 index 206c7c9..0000000 --- a/runtime_tests_grad.cpp +++ /dev/null @@ -1,394 +0,0 @@ -// -// Usage (bash): -// ~$ R -e "library(Rcpp); sourceCpp('runtime_tests_grad.cpp')" -// -// Usage (R): -// > library(Rcpp) -// > sourceCpp('runtime_tests_grad.cpp') -// - -// // #define ARMA_DONT_USE_WRAPPER -// #define ARMA_DONT_USE_BLAS - -// [[Rcpp::depends(RcppArmadillo)]] -#include -#include - -// [[Rcpp::export]] -arma::mat arma_grad(const arma::mat& X, - const arma::mat& X_diff, - const arma::vec& Y, - const arma::vec& Y_rep, - const arma::mat& V, - const double h) { - using namespace arma; - - uword n = X.n_rows; - uword p = X.n_cols; - - // orthogonal projection matrix `Q = I - VV'` for dist computation - mat Q = -(V * V.t()); - Q.diag() += 1; - // calc pairwise distances as `D` with `D(i, j) = d_i(V, X_j)` - vec D_vec = sum(square(X_diff * Q), 1); - mat D = reshape(D_vec, n, n); - // calc weights as `W` with `W(i, j) = w_i(V, X_j)` - mat W = exp(D / (-2.0 * h)); - W = normalise(W, 1); // colomn-wise, 1-norm - vec W_vec = vectorise(W); - // centered weighted `Y` means - vec y1 = W.t() * Y; - vec y2 = W.t() * square(Y); - // loss for each X_i, meaning `L(i) = L_n(V, X_i)` - vec L = y2 - square(y1); - // "global" loss - double loss = mean(L); - // `G = \nabla_V L_n(V)` a.k.a. gradient of `L` with respect to `V` - vec scale = (repelem(L, n, 1) - square(Y_rep - repelem(y1, n, 1))) % W_vec % D_vec; - mat X_diff_scale = X_diff.each_col() % scale; - mat G = X_diff_scale.t() * X_diff * V; - G *= -2.0 / (h * h * n); - - return G; -} - -// ATTENTION: assumed `X` stores X_i's column wise, `X = cbind(X_1, X_2, ..., X_n)` -// [[Rcpp::export]] -arma::mat grad(const arma::mat& X, - const arma::vec& Y, - const arma::mat& V, - const double h -) { - using namespace arma; - - // get dimensions - const uword n = X.n_cols; - const uword p = X.n_rows; - const uword q = V.n_cols; - - // compute orthogonal projection - mat Q = -(V * V.t()); - Q.diag() += 1.0; - - // distance matrix `D(i, j) = d_i(V, X_j)` - mat D(n, n, fill::zeros); - // weights matrix `W(i, j) = w_i(V, X_j)` - mat W(n, n, fill::ones); // exp(0) = 1 - - double mvm = 0.0; // Matrix Vector Mult. - double sos = 0.0; // Sum Of Squares - // double wcn = 0.0; // Weights Column Norm - for (uword j = 0; j + 1 < n; ++j) { - for (uword i = j + 1; i < n; ++i) { - sos = 0.0; - for (uword k = 0; k < p; ++k) { - mvm = 0.0; - for (uword l = 0; l < p; ++l) { - mvm += Q(k, l) * (X(l, i) - X(l, j)); - } - sos += mvm * mvm; - } - D(i, j) = D(j, i) = sos; - W(i, j) = W(j, i) = std::exp(sos / (-2. * h)); - } - } - - // column normalization of weights `W` - double col_sum; - for (uword j = 0; j < n; ++j) { - col_sum = 0.0; - for (uword i = 0; i < n; ++i) { - col_sum += W(i, j); - } - for (uword i = 0; i < n; ++i) { - W(i, j) /= col_sum; - } - } - - // weighted first, second order responce means `y1`, `y2` - // and component wise Loss `L(i) = L_n(V, X_i)` - vec y1(n); - vec y2(n); - vec L(n); - double tmp; - double loss = 0.0; - for (uword i = 0; i < n; ++i) { - mvm = 0.0; // Matrix Vector Mult. - sos = 0.0; // Sum Of Squared (weighted) - for (uword k = 0; k < n; ++k) { - mvm += (tmp = W(k, i) * Y(k)); - sos += tmp * Y(k); // W(k, i) * Y(k) * Y(k) - } - y1(i) = mvm; - y2(i) = sos; - loss += (L(i) = sos - (mvm * mvm)); // L_n(V, X_i) = y2(i) - y1(i)^2 - } - loss /= n; - - mat S(n, n); - for (uword k = 0; k < n; ++k) { - for (uword l = 0; l < n; ++l) { - tmp = Y(k) - y1(l); - S(k, l) = (L(l) - (tmp * tmp)) * W(k, l) * D(k, l); - } - } - - // gradient - mat G(p, q); - double factor = -2. / (n * h * h); - double gij; - for (uword j = 0; j < q; ++j) { - for (uword i = 0; i < p; ++i) { - gij = 0.0; - for (uword k = 0; k < n; ++k) { - for (uword l = 0; l < n; ++l) { - mvm = 0.0; - for (uword m = 0; m < p; ++m) { - mvm += (X(m, l) - X(m, k)) * V(m, j); - } - // gij += (S(k, l) + S(l, k)) * (X(i, l) - X(i, k)); - gij += S(k, l) * (X(i, l) - X(i, k)) * mvm; - } - } - G(i, j) = factor * gij; - } - } - - return G; -} - -// ATTENTION: assumed `X` stores X_i's column wise, `X = cbind(X_1, X_2, ..., X_n)` -// [[Rcpp::export]] -arma::mat grad_p(const arma::mat& X_ref, - const arma::vec& Y_ref, - const arma::mat& V_ref, - const double h -) { - using arma::uword; - - // get dimensions - const uword n = X_ref.n_cols; - const uword p = X_ref.n_rows; - const uword q = V_ref.n_cols; - - const double* X = X_ref.memptr(); - const double* Y = Y_ref.memptr(); - const double* V = V_ref.memptr(); - - // allocate memory for entire algorithm - // Q (p,p) D+W+S (n,n) y1+L (n) G (p,q) - uword memsize = (p * p) + (3 * n * n) + (2 * n) + (p * q); - double* mem = static_cast(malloc(sizeof(double) * memsize)); - - // assign pointer to memory associated memory area - double* Q = mem; - double* D = Q + (p * p); - double* W = D + (n * n); - double* S = W + (n * n); - double* y1 = S + (n * n); - double* L = y1 + n; - double* G = L + n; - - // compute orthogonal projection - double sum; - // compute orthogonal projection `Q = I_p - V V'` - for (uword j = 0; j < p; ++j) { - for (uword i = j; i < p; ++i) { - sum = 0.0; - for (uword k = 0; k < q; ++k) { - sum += V[k * p + i] * V[k * p + j]; - } - if (i == j) { - Q[j * p + i] = 1.0 - sum; - } else { - Q[j * p + i] = Q[i * p + j] = -sum; - } - } - } - - // set `diag(D) = 0` and `diag(W) = 1`. - for (uword i = 0; i < n * n; i += n + 1) { - D[i] = 0.0; - W[i] = 1.0; - } - // components (using symmetrie) of `D` and `W` (except `diag`) - double mvm = 0.0; // Matrix Vector Mult. - for (uword j = 0; j + 1 < n; ++j) { - for (uword i = j + 1; i < n; ++i) { - sum = 0.0; - for (uword k = 0; k < p; ++k) { - mvm = 0.0; - for (uword l = 0; l < p; ++l) { - mvm += Q[l * p + k] * (X[i * p + l] - X[j * p + l]); - } - sum += mvm * mvm; - } - D[j * n + i] = D[i * n + j] = sum; - W[j * n + i] = W[i * n + j] = std::exp(sum / (-2. * h)); - } - } - - // column normalization of weights `W` - for (uword j = 0; j < n; ++j) { - sum = 0.0; - for (uword i = 0; i < n; ++i) { - sum += W[j * n + i]; - } - for (uword i = 0; i < n; ++i) { - W[j * n + i] /= sum; - } - } - // weighted first, secend order responce means `y1`, `y2` - // and component wise Loss `L(i) = L_n(V, X_i)` - double tmp; - double loss = 0.0; - for (uword i = 0; i < n; ++i) { - mvm = 0.0; // Matrix Vector Mult. - sum = 0.0; // Sum Of (weighted) Squares - for (uword k = 0; k < n; ++k) { - mvm += (tmp = W[i * n + k] * Y[k]); - sum += tmp * Y[k]; - } - y1[i] = mvm; - loss += (L[i] = sum - (mvm * mvm)); - } - loss /= n; - - // scaling for gradient summation - // this scaling matrix is the lower triangular matrix defined as - // - // S_kl := (s_{kl} + s_{lk}) D_{kl} - // s_ij := (L_n(V, X_j) - (Y_i - y1(V, X_j))^2) W_ij - double factor; - for (uword l = 0; l < n; ++l) { - for (uword k = l + 1; k < n; ++k) { - tmp = Y[k] - y1[l]; - factor = (L[l] - (tmp * tmp)) * W[l * n + k]; // \tile{S}_{kl} - tmp = Y[l] - y1[k]; - factor += (L[k] - (tmp * tmp)) * W[k * n + l]; // \tile{S}_{lk} - S[l * n + k] = factor * D[l * n + k]; // (s_kl + s_lk) * D_kl - } - } - - // gradient - // reuse memory area of `Q` - // no longer needed and provides enough space (`q < p`) - double* GD = Q; - const double* X_l; - const double* X_k; - for (uword j = 0; j < p; ++j) { - for (uword i = 0; i < p; ++i) { - sum = 0.0; - for (uword l = 0; l < n; ++l) { - X_l = X + (l * p); - for (uword k = l + 1; k < n; ++k) { - X_k = X + (k * p); - sum += S[l * n + k] * (X_l[i] - X_k[i]) * (X_l[j] - X_k[j]); - } - } - GD[j * p + i] = sum; - } - } - - // distance gradient `DG` to gradient by multiplying with `V` - factor = -2. / (n * h * h); - for (uword i = 0; i < p; ++i) { - for (uword j = 0; j < q; ++j) { - sum = 0.0; - for (uword k = 0; k < p; ++k) { - sum += GD[k * p + i] * V[j * p + k]; - } - G[j * p + i] = factor * sum; - } - } - - // construct 'Armadillo' matrix from `G`s memory area - arma::mat Grad(G, p, q); - - // free entire allocated memory block - free(mem); - - return Grad; -} - - - -/*** R - -suppressMessages(library(microbenchmark)) - -cat("Start timing:\n") -time.start <- Sys.time() - -rStiefl <- function(p, q) { - return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q)))) -} - -## compare runtimes -n <- 200L -p <- 12L -q <- p - 2L -X <- matrix(rnorm(n * p), n, p) -Xt <- t(X) -X_diff <- kronecker(rep(1, n), X) - kronecker(X, rep(1, n)) -Y <- rnorm(n) -Y_rep <- kronecker(rep(1, n), Y) # repmat(Y, n, 1) -h <- 1. / 4.; -V <- rStiefl(p, q) - -# A <- arma_grad(X, X_diff, Y, Y_rep, V, h) -# G1 <- grad(Xt, Y, V, h) -# G2 <- grad_p(Xt, Y, V, h) -# -# print(round(A[1:6, 1:6], 3)) -# print(round(G1[1:6, 1:6], 3)) -# print(round(G2[1:6, 1:6], 3)) -# print(round(abs(A - G1), 9)) -# print(round(abs(A - G2), 9)) -# -# q() - - -comp <- function (A, B, tol = sqrt(.Machine$double.eps)) { - max(abs(A - B)) < tol -} -comp.all <- function (res) { - if (length(res) < 2) { - return(TRUE) - } - res.one = res[[1]] - for (i in 2:length(res)) { - if (!comp(res.one, res[[i]])) { - return(FALSE) - } - } - return(TRUE) -} -counter <- 0 -setup.tests <- function () { - if ((counter %% 3) == 0) { - X <<- matrix(rnorm(n * p), n, p) - Xt <<- t(X) - X_diff <<- kronecker(rep(1, n), X) - kronecker(X, rep(1, n)) - Y <<- rnorm(n) - Y_rep <<- kronecker(rep(1, n), Y) # arma::repmat(Y, n, 1) - h <<- 1. / 4.; - V <<- rStiefl(p, q) - } - counter <<- counter + 1 -} -mbm <- microbenchmark( - arma = arma_grad(X, X_diff, Y, Y_rep, V, h), - # grad = grad(Xt, Y, V, h), - grad_p = grad_p(Xt, Y, V, h), - check = comp.all, - setup = setup.tests(), - times = 100L -) -cat("\033[1m\033[92mTotal time:", format(Sys.time() - time.start), '\n') -print(mbm) -cat("\033[0m") - -boxplot(mbm, las = 2, xlab = NULL) - -*/ diff --git a/test.R b/test.R new file mode 100644 index 0000000..5fa890b --- /dev/null +++ b/test.R @@ -0,0 +1,60 @@ +# library(CVEpureR) +# path <- '~/Projects/CVE/tmp/logger.R.pdf' + +library(CVE) +path <- '~/Projects/CVE/tmp/seeded_test.pdf' + +epochs <- 100 +attempts <- 25 + +# Define the logger for the `cve()` method. +logger <- function(env) { + # Note the `<<-` assignement! + loss.history[env$epoch + 1, env$attempt] <<- env$loss + error.history[env$epoch + 1, env$attempt] <<- env$error + tau.history[env$epoch + 1, env$attempt] <<- env$tau + # Compute true error by comparing to the true `B` + B.est <- null(env$V) # Function provided by CVE + P.est <- B.est %*% solve(t(B.est) %*% B.est) %*% t(B.est) + true.error <- norm(P - P.est, 'F') / sqrt(2 * k) + true.error.history[env$epoch + 1, env$attempt] <<- true.error +} + +pdf(path) +par(mfrow = c(2, 2)) + +for (name in paste0("M", seq(5))) { + # Seed random number generator + set.seed(42) + + # Create a dataset + ds <- dataset(name) + X <- ds$X + Y <- ds$Y + B <- ds$B # the true `B` + k <- ncol(ds$B) + # True projection matrix. + P <- B %*% solve(t(B) %*% B) %*% t(B) + + # Setup histories. + loss.history <- matrix(NA, epochs + 1, attempts) + error.history <- matrix(NA, epochs + 1, attempts) + tau.history <- matrix(NA, epochs + 1, attempts) + true.error.history <- matrix(NA, epochs + 1, attempts) + + dr <- cve(Y ~ X, k = k, logger = logger, epochs = epochs, attempts = attempts) + + # Plot history's + matplot(loss.history, type = 'l', log = 'y', xlab = 'i (iteration)', + main = paste('loss', name), + ylab = expression(L(V[i]))) + matplot(true.error.history, type = 'l', log = 'y', xlab = 'i (iteration)', + main = paste('true error', name), + ylab = expression(group('|', B * B^T - B[i] * B[i]^T, '|')[F] / sqrt(2 * k))) + matplot(error.history, type = 'l', log = 'y', xlab = 'i (iteration)', + main = paste('error', name), + ylab = expression(group('|', V[i-1] * V[i-1]^T - V[i] * V[i]^T, '|')[F])) + matplot(tau.history, type = 'l', log = 'y', xlab = 'i (iteration)', + main = paste('learning rate', name), + ylab = expression(tau[i])) +} \ No newline at end of file diff --git a/validate.R b/validate.R deleted file mode 100644 index 3d7c8d8..0000000 --- a/validate.R +++ /dev/null @@ -1,104 +0,0 @@ -# -# Usage: -# ~$ Rscript validate.R - -# load MAVE package for comparison -library(MAVE) -# load (and compile) cve and dataset source -library(Rcpp) -cat("Compiling source 'cve_V1.cpp'\n") -Rcpp::sourceCpp('cve_V1.cpp', embeddedR = FALSE) -# load dataset sampler -source('CVE/R/datasets.R') - -# set default nr of simulations -nr.sim <- 25 - -#' Orthogonal projection to sub-space spanned by `B` -#' -#' @param B Matrix -#' @return Orthogonal Projection Matrix -proj <- function(B) { - B %*% solve(t(B) %*% B) %*% t(B) -} - -#' Compute nObs given dataset dimension \code{n}. -#' -#' @param n Number of samples -#' @return Numeric estimate of \code{nObs} -nObs <- function (n) { n^0.5 } - -# dataset names -dataset.names <- c("M1", "M2", "M3", "M4", "M5") # M4 not implemented jet - -## prepare "logging" -# result error, time, ... data.frame's -error <- matrix(nrow = nr.sim, ncol = 2 * length(dataset.names)) -time <- matrix(nrow = nr.sim, ncol = 2 * length(dataset.names)) -# convert to data.frames -error <- as.data.frame(error) -time <- as.data.frame(time) -# set names -names(error) <- kronecker(c("CVE.", "MAVE."), dataset.names, paste0) -names(time) <- kronecker(c("CVE.", "MAVE."), dataset.names, paste0) - -# get current time -start.time <- Sys.time() -## main comparison loop (iterate `nr.sim` times for each dataset) -for (i in seq_along(dataset.names)) { - for (j in 1:nr.sim) { - name <- dataset.names[i] - # reporting progress - cat("\rRunning Test (", name, j , "):", - (i - 1) * nr.sim + j, "/", length(dataset.names) * nr.sim, - " - Time since start:", format(Sys.time() - start.time), "\033[K") - # create new dataset - ds <- dataset(name) - k <- ncol(ds$B) # real dim - # call CVE - cve.time <- system.time( - cve.res <- cve_cpp(ds$X, ds$Y, - k = k, - nObs = nObs(nrow(ds$X)), - verbose = FALSE) - ) - # call MAVE - mave.time <- system.time( - mave.res <- mave(Y ~ ., - data = data.frame(X = ds$X, Y = ds$Y), - method = "meanMAVE") - ) - # compute real and approximated sub-space projections - P <- proj(ds$B) # real - P.cve <- proj(cve.res$B) - P.mave <- proj(mave.res$dir[[k]]) - # compute (and store) errors - error[j, paste0("CVE.", name)] <- norm(P - P.cve, 'F') / sqrt(2 * k) - error[j, paste0("MAVE.", name)] <- norm(P - P.mave, 'F') / sqrt(2 * k) - # store run-times - time[j, paste0("CVE.", name)] <- cve.time["elapsed"] - time[j, paste0("MAVE.", name)] <- mave.time["elapsed"] - } -} - -cat("\n\n## Time [sec] Means:\n") -print(colMeans(time)) -cat("\n## Error Means:\n") -print(colMeans(error)) - -len <- length(dataset.names) -pdf("plots/Rplots_validate.pdf") -boxplot(as.matrix(error), - main = paste0("Error (nr.sim = ", nr.sim, ")"), - ylab = expression(error == group("||", P[B] - P[hat(B)], "||")[F] / sqrt(2*k)), - las = 2, - at = c(1:len, 1:len + len + 1) -) -boxplot(as.matrix(time), - main = paste0("Time (nr.sim = ", nr.sim, ")"), - ylab = "time [sec]", - las = 2, - at = c(1:len, 1:len + len + 1) -) -cat("Plot saved to 'plots/Rplots_validate.pdf'\n") -suppressMessages(dev.off()) diff --git a/wip.R b/wip.R index 25eb66d..7648c37 100644 --- a/wip.R +++ b/wip.R @@ -1,273 +1,5 @@ library(microbenchmark) -dyn.load("wip.so") - - -## rowSum* .call -------------------------------------------------------------- -rowSums.c <- function(M) { - stopifnot( - is.matrix(M), - is.numeric(M) - ) - if (!is.double(M)) { - M <- matrix(as.double(M), nrow = nrow(M)) - } - - .Call('R_rowSums', PACKAGE = 'wip', M) -} -colSums.c <- function(M) { - stopifnot( - is.matrix(M), - is.numeric(M) - ) - if (!is.double(M)) { - M <- matrix(as.double(M), nrow = nrow(M)) - } - - .Call('R_colSums', PACKAGE = 'wip', M) -} -rowSquareSums.c <- function(M) { - stopifnot( - is.matrix(M), - is.numeric(M) - ) - if (!is.double(M)) { - M <- matrix(as.double(M), nrow = nrow(M)) - } - - .Call('R_rowSquareSums', PACKAGE = 'wip', M) -} -rowSumsSymVec.c <- function(vecA, nrow, diag = 0.0) { - stopifnot( - is.vector(vecA), - is.numeric(vecA), - is.numeric(diag), - nrow * (nrow - 1) == length(vecA) * 2 - ) - if (!is.double(vecA)) { - vecA <- as.double(vecA) - } - .Call('R_rowSumsSymVec', PACKAGE = 'wip', - vecA, as.integer(nrow), as.double(diag)) -} -rowSweep.c <- function(A, v, op = '-') { - stopifnot( - is.matrix(A), - is.numeric(v) - ) - if (!is.double(A)) { - A <- matrix(as.double(A), nrow = nrow(A)) - } - if (!is.vector(v) || !is.double(v)) { - v <- as.double(v) - } - stopifnot( - nrow(A) == length(v), - op %in% c('+', '-', '*', '/') - ) - - .Call('R_rowSweep', PACKAGE = 'wip', A, v, op) -} - -## row*, col* tests ------------------------------------------------------------ -n <- 3000 -M <- matrix(runif(n * 12), n, 12) -stopifnot( - all.equal(rowSums(M^2), rowSums.c(M^2)), - all.equal(colSums(M), colSums.c(M)) -) -microbenchmark( - rowSums = rowSums(M^2), - rowSums.c = rowSums.c(M^2), - rowSqSums.c = rowSquareSums.c(M) -) -microbenchmark( - colSums = colSums(M), - colSums.c = colSums.c(M) -) - -sum = rowSums(M) -stopifnot(all.equal( - sweep(M, 1, sum, FUN = `/`), - rowSweep.c(M, sum, '/') # Col-Normalize) -), all.equal( - sweep(M, 1, sum, FUN = `/`), - M / sum -)) -microbenchmark( - sweep = sweep(M, 1, sum, FUN = `/`), - M / sum, - rowSweep.c = rowSweep.c(M, sum, '/') # Col-Normalize) -) - -# Create symmetric matrix with constant diagonal entries. -nrow <- 200 -diag <- 1.0 -Sym <- tcrossprod(runif(nrow)) -diag(Sym) <- diag -# Get vectorized lower triangular part of `Sym` matrix. -SymVec <- Sym[lower.tri(Sym)] -stopifnot(all.equal( - rowSums(Sym), - rowSumsSymVec.c(SymVec, nrow, diag) -)) -microbenchmark( - rowSums = rowSums(Sym), - rowSums.c = rowSums.c(Sym), - rowSumsSymVec.c = rowSumsSymVec.c(SymVec, nrow, diag) -) - -## Matrix-Matrix opperation .call --------------------------------------------- -transpose.c <- function(A) { - stopifnot( - is.matrix(A), is.numeric(A) - ) - if (!is.double(A)) { - A <- matrix(as.double(A), nrow(A), ncol(A)) - } - - .Call('R_transpose', PACKAGE = 'wip', A) -} - -matrixprod.c <- function(A, B) { - stopifnot( - is.matrix(A), is.numeric(A), - is.matrix(B), is.numeric(B), - ncol(A) == nrow(B) - ) - if (!is.double(A)) { - A <- matrix(as.double(A), nrow = nrow(A)) - } - if (!is.double(B)) { - B <- matrix(as.double(B), nrow = nrow(B)) - } - - .Call('R_matrixprod', PACKAGE = 'wip', A, B) -} -crossprod.c <- function(A, B) { - stopifnot( - is.matrix(A), is.numeric(A), - is.matrix(B), is.numeric(B), - nrow(A) == nrow(B) - ) - if (!is.double(A)) { - A <- matrix(as.double(A), nrow = nrow(A)) - } - if (!is.double(B)) { - B <- matrix(as.double(B), nrow = nrow(B)) - } - - .Call('R_crossprod', PACKAGE = 'wip', A, B) -} -skewSymRank2k.c <- function(A, B, alpha = 1, beta = 0) { - stopifnot( - is.matrix(A), is.numeric(A), - is.matrix(B), is.numeric(B), - all(dim(A) == dim(B)), - is.numeric(alpha), length(alpha) == 1L, - is.numeric(beta), length(beta) == 1L - ) - if (!is.double(A)) { - A <- matrix(as.double(A), nrow = nrow(A)) - } - if (!is.double(B)) { - B <- matrix(as.double(B), nrow = nrow(B)) - } - - .Call('R_skewSymRank2k', PACKAGE = 'wip', A, B, - as.double(alpha), as.double(beta)) -} - -## Matrix-Matrix opperation tests --------------------------------------------- -n <- 200 -k <- 100 -m <- 300 - -A <- matrix(runif(n * k), n, k) -B <- matrix(runif(k * m), k, m) -stopifnot( - all.equal(t(A), transpose.c(A)) -) -microbenchmark( - t(A), - transpose.c(A) -) - -stopifnot( - all.equal(A %*% B, matrixprod.c(A, B)) -) -microbenchmark( - "%*%" = A %*% B, - matrixprod.c = matrixprod.c(A, B) -) - -A <- matrix(runif(k * n), k, n) -B <- matrix(runif(k * m), k, m) -stopifnot( - all.equal(crossprod(A, B), crossprod.c(A, B)) -) -microbenchmark( - crossprod = crossprod(A, B), - crossprod.c = crossprod.c(A, B) -) - -n <- 12 -k <- 11 -A <- matrix(runif(n * k), n, k) -B <- matrix(runif(n * k), n, k) -stopifnot(all.equal( - A %*% t(B) - B %*% t(A), skewSymRank2k.c(A, B) -)) -microbenchmark( - A %*% t(B) - B %*% t(A), - skewSymRank2k.c(A, B) -) - -## Orthogonal projection onto null space .Call -------------------------------- -nullProj.c <- function(B) { - stopifnot( - is.matrix(B), is.numeric(B) - ) - if (!is.double(B)) { - B <- matrix(as.double(B), nrow = nrow(B)) - } - - .Call('R_nullProj', PACKAGE = 'wip', B) -} -## Orthogonal projection onto null space tests -------------------------------- -p <- 12 -q <- 10 -V <- qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))) - -# Projection matrix onto `span(V)` -Q <- diag(1, p) - tcrossprod(V, V) -stopifnot( - all.equal(Q, nullProj.c(V)) -) -microbenchmark( - nullProj = diag(1, p) - tcrossprod(V, V), - nullProj.c = nullProj.c(V) -) - - -# ## WIP for gradient. ---------------------------------------------------------- - -gradient.c <- function(X, X_diff, Y, V, h) { - stopifnot( - is.matrix(X), is.double(X), - is.matrix(X_diff), is.double(X_diff), - ncol(X_diff) == ncol(X), nrow(X_diff) == nrow(X) * (nrow(X) - 1) / 2, - is.vector(Y) || (is.matrix(Y) && pmin(dim(Y)) == 1L), is.double(Y), - length(Y) == nrow(X), - is.matrix(V), is.double(V), - nrow(V) == ncol(X), - is.vector(h), is.numeric(h), length(h) == 1 - ) - - .Call('R_gradient', PACKAGE = 'wip', - X, X_diff, as.double(Y), V, as.double(h)); -} - elem.pairs <- function(elements) { # Number of elements to match. n <- length(elements) @@ -276,25 +8,20 @@ elem.pairs <- function(elements) { # Select unique combinations without self interaction. return(pairs[, pairs[1, ] < pairs[2, ]]) } + +rStiefl <- function(p, q) { + return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q)))) +} + grad <- function(X, Y, V, h, persistent = TRUE) { n <- nrow(X) p <- ncol(X) - if (!persistent) { - pair.index <- elem.pairs(seq(n)) - i <- pair.index[, 1] # `i` indices of `(i, j)` pairs - j <- pair.index[, 2] # `j` indices of `(i, j)` pairs - lower <- ((i - 1) * n) + j - upper <- ((j - 1) * n) + i - X_diff <- X[i, , drop = F] - X[j, , drop = F] - } - # Projection matrix onto `span(V)` Q <- diag(1, p) - tcrossprod(V, V) # Vectorized distance matrix `D`. vecD <- rowSums((X_diff %*% Q)^2) - # Weight matrix `W` (dnorm ... gaussean density function) W <- matrix(1, n, n) # `exp(0) == 1` W[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part @@ -304,7 +31,6 @@ grad <- function(X, Y, V, h, persistent = TRUE) { # Weighted `Y` momentums y1 <- Y %*% W # Result is 1D -> transposition irrelevant y2 <- Y^2 %*% W - # Per example loss `L(V, X_i)` L <- y2 - y1^2 @@ -318,8 +44,38 @@ grad <- function(X, Y, V, h, persistent = TRUE) { G <- (-2 / (n * h^2)) * G return(G) } -rStiefl <- function(p, q) { - return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q)))) + +grad2 <- function(X, Y, V, h, persistent = TRUE) { + n <- nrow(X) + p <- ncol(X) + + # Projection matrix onto `span(V)` + Q <- diag(1, p) - tcrossprod(V, V) + # Vectorized distance matrix `D`. + # vecD <- rowSums((X_diff %*% Q)^2) + vecD <- colSums(tcrossprod(Q, X_diff)^2) + + # Weight matrix `W` (dnorm ... gaussean density function) + K <- matrix(1, n, n) # `exp(0) == 1` + K[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part + K[upper] <- t(K)[upper] # Mirror lower tri. to upper + # W <- sweep(K, 2, colSums(K), FUN = `/`) # Col-Normalize + + # Weighted `Y` momentums + colSumsK <- colSums(K) + y1 <- (K %*% Y) / colSumsK + y2 <- (K %*% Y^2) / colSumsK + # Per example loss `L(V, X_i)` + L <- y2 - y1^2 + + tmp <- kronecker(matrix(y1, n, 1), matrix(Y, 1, n), `-`)^2 + tmp <- as.vector(L) - tmp + tmp <- tmp * K / colSumsK + vecS <- (tmp + t(tmp))[lower] * vecD + + G <- crossprod(X_diff, X_diff * vecS) %*% V + G <- (-2 / (n * h^2)) * G + return(G) } n <- 200 @@ -340,9 +96,9 @@ X_diff <- X[i, , drop = F] - X[j, , drop = F] stopifnot(all.equal( grad(X, Y, V, h), - gradient.c(X, X_diff, Y, V, h) + grad2(X, Y, V, h) )) microbenchmark( grad = grad(X, Y, V, h), - gradient.c = gradient.c(X, X_diff, Y, V, h) + grad2 = grad2(X, Y, V, h) )