add: to notes,

fix: bottleneck in grad
2019-09-03 20:43:34 +02:00 · 2019-09-03 20:43:34 +02:00 · 47917fe0bd
commit 47917fe0bd
parent 7d4d01a9a7
2 changed files with 70 additions and 3 deletions
--- a/CVE_R/R/gradient.R
+++ b/CVE_R/R/gradient.R
@ -67,7 +67,17 @@ grad <- function(X, Y, V, h,
    vecS <- vecS * vecD

    # The gradient.
-    G <- crossprod(X_diff, sweep(X_diff %*% V, 1, vecS, `*`))
+    # 1. The `crossprod(A, B)` is equivalent to `t(A) %*% B`,
+    # 2. `(X_diff %*% V) * vecS` is first a marix matrix mult. and then using
+    #    recycling to scale each row with the values of `vecS`.
+    #    Note that `vecS` is a vector and that `R` uses column-major ordering
+    #    of matrices.
+    # (See: notes for more details)
+    # TODO: Depending on n, p, q decide which version to take (for current
+    #    datasets "inner" is faster, see: notes).
+    #    inner = crossprod(X_diff, X_diff * vecS) %*% V,
+    #    outer = crossprod(X_diff, (X_diff %*% V) * vecS)
+    G <- crossprod(X_diff, X_diff * vecS) %*% V
    G <- (-2 / (n * h^2)) * G
    return(G)
 }
--- a/notes.md
+++ b/notes.md
@ -33,9 +33,15 @@ install.packages(path, repos = NULL, type = "source")
 The runtime tests (upcomming further tests) are creating log files saved in `tmp/`. These log files are `CSV` files (actualy `TSV`) with a header storing the test results. Depending on the test the files may contain differnt data. As an example we use the runtime test logs which store in each line the `dataset`, the used `method` as well as the `error` (actual error of estimated `B` against real `B`) and the `time`. For reading and analysing the data see the following example.
 ```R
 # Load log as `data.frame`
-test0 <- read.csv('tmp/test0.log', sep = '\t')
+log <- read.csv('tmp/test0.log', sep = '\t')
 # Create a error boxplot grouped by dataset.
-boxplot(error ~ dataset, test0)
+boxplot(error ~ dataset, log)
+
+# Overview
+for (ds.name in paste0('M', seq(5))) {
+    ds <- subset(log, dataset == ds.name, select = c('method', 'dataset', 'time', 'error'))
+    print(summary(ds))
+}
 ```

 ## Environments and variable lookup.
@ -187,6 +193,57 @@ microbenchmark(
 #  tcrossprod(V, V) 1.003 1.1575 1.28451 1.2400 1.3685  2.742   100
 ```

+### Recycling vs. Sweep
+```R
+(n <- 200)
+(p <- 12)
+(q <- 10)
+X_diff <- matrix(runif(n * (n - 1) / 2 * p), n * (n - 1) / 2, p)
+V <- matrix(rnorm(p * q), p, q)
+vecS <- runif(n * (n - 1) / 2)
+
+stopifnot(
+    all.equal((X_diff %*% V) * rep(vecS, q),
+              sweep(X_diff %*% V, 1, vecS, `*`)),
+    all.equal((X_diff %*% V) * rep(vecS, q),
+              (X_diff %*% V) * vecS)
+)
+microbenchmark(
+    rep = (X_diff %*% V) * rep(vecS, q),
+    sweep = sweep(X_diff %*% V, 1, vecS, `*`, check.margin = FALSE),
+    recycle = (X_diff %*% V) * vecS
+)
+# Unit: microseconds
+#     expr      min        lq     mean    median       uq      max neval
+#      rep  851.723  988.3655 1575.639 1203.6385 1440.578 18999.23   100
+#    sweep 1313.177 1522.4010 2355.269 1879.2605 2065.399 18783.24   100
+#  recycle  719.001  786.1265 1157.285  881.8825 1163.202 19091.79   100
+```
+### Scaled `crossprod` with matmul order.
+```R
+(n <- 200)
+(p <- 12)
+(q <- 10)
+X_diff <- matrix(runif(n * (n - 1) / 2 * p), n * (n - 1) / 2, p)
+V <- matrix(rnorm(p * q), p, q)
+vecS <- runif(n * (n - 1) / 2)
+
+ref <- crossprod(X_diff, X_diff * vecS) %*% V
+stopifnot(
+    all.equal(ref, crossprod(X_diff, (X_diff %*% V) * vecS)),
+    all.equal(ref, crossprod(X_diff, (X_diff %*% V) * vecS))
+)
+microbenchmark(
+    inner = crossprod(X_diff, X_diff * vecS) %*% V,
+    outer = crossprod(X_diff, (X_diff %*% V) * vecS)
+)
+# Unit: microseconds
+#   expr      min       lq     mean    median       uq       max neval
+#  inner  789.065  867.939 1683.812  987.9375 1290.055 16800.265   100
+#  outer 1141.479 1216.929 1404.702 1317.7315 1582.800  2531.766   100
+```
+
+
 ## Using `Rprof()` for performance.
 The standart method for profiling where an algorithm is spending its time is with `Rprof()`.
 ```R