parent
7d4d01a9a7
commit
47917fe0bd
|
@ -67,7 +67,17 @@ grad <- function(X, Y, V, h,
|
||||||
vecS <- vecS * vecD
|
vecS <- vecS * vecD
|
||||||
|
|
||||||
# The gradient.
|
# The gradient.
|
||||||
G <- crossprod(X_diff, sweep(X_diff %*% V, 1, vecS, `*`))
|
# 1. The `crossprod(A, B)` is equivalent to `t(A) %*% B`,
|
||||||
|
# 2. `(X_diff %*% V) * vecS` is first a marix matrix mult. and then using
|
||||||
|
# recycling to scale each row with the values of `vecS`.
|
||||||
|
# Note that `vecS` is a vector and that `R` uses column-major ordering
|
||||||
|
# of matrices.
|
||||||
|
# (See: notes for more details)
|
||||||
|
# TODO: Depending on n, p, q decide which version to take (for current
|
||||||
|
# datasets "inner" is faster, see: notes).
|
||||||
|
# inner = crossprod(X_diff, X_diff * vecS) %*% V,
|
||||||
|
# outer = crossprod(X_diff, (X_diff %*% V) * vecS)
|
||||||
|
G <- crossprod(X_diff, X_diff * vecS) %*% V
|
||||||
G <- (-2 / (n * h^2)) * G
|
G <- (-2 / (n * h^2)) * G
|
||||||
return(G)
|
return(G)
|
||||||
}
|
}
|
||||||
|
|
61
notes.md
61
notes.md
|
@ -33,9 +33,15 @@ install.packages(path, repos = NULL, type = "source")
|
||||||
The runtime tests (upcomming further tests) are creating log files saved in `tmp/`. These log files are `CSV` files (actualy `TSV`) with a header storing the test results. Depending on the test the files may contain differnt data. As an example we use the runtime test logs which store in each line the `dataset`, the used `method` as well as the `error` (actual error of estimated `B` against real `B`) and the `time`. For reading and analysing the data see the following example.
|
The runtime tests (upcomming further tests) are creating log files saved in `tmp/`. These log files are `CSV` files (actualy `TSV`) with a header storing the test results. Depending on the test the files may contain differnt data. As an example we use the runtime test logs which store in each line the `dataset`, the used `method` as well as the `error` (actual error of estimated `B` against real `B`) and the `time`. For reading and analysing the data see the following example.
|
||||||
```R
|
```R
|
||||||
# Load log as `data.frame`
|
# Load log as `data.frame`
|
||||||
test0 <- read.csv('tmp/test0.log', sep = '\t')
|
log <- read.csv('tmp/test0.log', sep = '\t')
|
||||||
# Create a error boxplot grouped by dataset.
|
# Create a error boxplot grouped by dataset.
|
||||||
boxplot(error ~ dataset, test0)
|
boxplot(error ~ dataset, log)
|
||||||
|
|
||||||
|
# Overview
|
||||||
|
for (ds.name in paste0('M', seq(5))) {
|
||||||
|
ds <- subset(log, dataset == ds.name, select = c('method', 'dataset', 'time', 'error'))
|
||||||
|
print(summary(ds))
|
||||||
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
## Environments and variable lookup.
|
## Environments and variable lookup.
|
||||||
|
@ -187,6 +193,57 @@ microbenchmark(
|
||||||
# tcrossprod(V, V) 1.003 1.1575 1.28451 1.2400 1.3685 2.742 100
|
# tcrossprod(V, V) 1.003 1.1575 1.28451 1.2400 1.3685 2.742 100
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Recycling vs. Sweep
|
||||||
|
```R
|
||||||
|
(n <- 200)
|
||||||
|
(p <- 12)
|
||||||
|
(q <- 10)
|
||||||
|
X_diff <- matrix(runif(n * (n - 1) / 2 * p), n * (n - 1) / 2, p)
|
||||||
|
V <- matrix(rnorm(p * q), p, q)
|
||||||
|
vecS <- runif(n * (n - 1) / 2)
|
||||||
|
|
||||||
|
stopifnot(
|
||||||
|
all.equal((X_diff %*% V) * rep(vecS, q),
|
||||||
|
sweep(X_diff %*% V, 1, vecS, `*`)),
|
||||||
|
all.equal((X_diff %*% V) * rep(vecS, q),
|
||||||
|
(X_diff %*% V) * vecS)
|
||||||
|
)
|
||||||
|
microbenchmark(
|
||||||
|
rep = (X_diff %*% V) * rep(vecS, q),
|
||||||
|
sweep = sweep(X_diff %*% V, 1, vecS, `*`, check.margin = FALSE),
|
||||||
|
recycle = (X_diff %*% V) * vecS
|
||||||
|
)
|
||||||
|
# Unit: microseconds
|
||||||
|
# expr min lq mean median uq max neval
|
||||||
|
# rep 851.723 988.3655 1575.639 1203.6385 1440.578 18999.23 100
|
||||||
|
# sweep 1313.177 1522.4010 2355.269 1879.2605 2065.399 18783.24 100
|
||||||
|
# recycle 719.001 786.1265 1157.285 881.8825 1163.202 19091.79 100
|
||||||
|
```
|
||||||
|
### Scaled `crossprod` with matmul order.
|
||||||
|
```R
|
||||||
|
(n <- 200)
|
||||||
|
(p <- 12)
|
||||||
|
(q <- 10)
|
||||||
|
X_diff <- matrix(runif(n * (n - 1) / 2 * p), n * (n - 1) / 2, p)
|
||||||
|
V <- matrix(rnorm(p * q), p, q)
|
||||||
|
vecS <- runif(n * (n - 1) / 2)
|
||||||
|
|
||||||
|
ref <- crossprod(X_diff, X_diff * vecS) %*% V
|
||||||
|
stopifnot(
|
||||||
|
all.equal(ref, crossprod(X_diff, (X_diff %*% V) * vecS)),
|
||||||
|
all.equal(ref, crossprod(X_diff, (X_diff %*% V) * vecS))
|
||||||
|
)
|
||||||
|
microbenchmark(
|
||||||
|
inner = crossprod(X_diff, X_diff * vecS) %*% V,
|
||||||
|
outer = crossprod(X_diff, (X_diff %*% V) * vecS)
|
||||||
|
)
|
||||||
|
# Unit: microseconds
|
||||||
|
# expr min lq mean median uq max neval
|
||||||
|
# inner 789.065 867.939 1683.812 987.9375 1290.055 16800.265 100
|
||||||
|
# outer 1141.479 1216.929 1404.702 1317.7315 1582.800 2531.766 100
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
## Using `Rprof()` for performance.
|
## Using `Rprof()` for performance.
|
||||||
The standart method for profiling where an algorithm is spending its time is with `Rprof()`.
|
The standart method for profiling where an algorithm is spending its time is with `Rprof()`.
|
||||||
```R
|
```R
|
||||||
|
|
Loading…
Reference in New Issue