parent
7155d0e9db
commit
1c120ec67c
|
@ -37,7 +37,8 @@ cve_sgd <- function(X, Y, k,
|
|||
# Reset learning rate `tau`.
|
||||
tau <- tau.init
|
||||
|
||||
# Sample a starting basis from the Stiefl manifold.
|
||||
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
|
||||
# optimization start value.
|
||||
V <- rStiefl(p, q)
|
||||
|
||||
# Repeat `epochs` times
|
||||
|
|
|
@ -12,35 +12,36 @@ cve_simple <- function(X, Y, k,
|
|||
epochs = 50L,
|
||||
attempts = 10L
|
||||
) {
|
||||
# Addapt tolearance for break condition
|
||||
tol <- sqrt(2 * k) * tol
|
||||
tau.init <- tau # remember to reset for new attempt
|
||||
# Set `grad` functions environment to enable if to find this environments
|
||||
# local variabels, needed to enable the manipulation of this local variables
|
||||
# from within `grad`.
|
||||
environment(grad) <- environment()
|
||||
|
||||
# Setup loss histroy.
|
||||
loss.history <- matrix(NA, epochs, attempts);
|
||||
|
||||
# Get dimensions.
|
||||
n <- nrow(X)
|
||||
p <- ncol(X)
|
||||
q <- p - k
|
||||
|
||||
# Save initial learning rate `tau`.
|
||||
tau.init <- tau
|
||||
# Addapt tolearance for break condition.
|
||||
tol <- sqrt(2 * q) * tol
|
||||
|
||||
# Estaimate bandwidth if not given.
|
||||
if (missing(h) | !is.numeric(h)) {
|
||||
h <- estimate.bandwidth(X, k, nObs)
|
||||
}
|
||||
|
||||
# Compue all static data.
|
||||
X_diff <- row.pair.apply(X, `-`)
|
||||
index <- matrix(seq(n * n), n, n)
|
||||
tri.i <- row.pair.apply(index[, 1, drop = FALSE], function(i, j) { i })
|
||||
tri.j <- row.pair.apply(index[, 1, drop = FALSE], function(i, j) { j })
|
||||
lower.tri.ind <- index[lower.tri(index)]
|
||||
upper.tri.ind <- t(index)[lower.tri.ind] # ATTENTION: corret order
|
||||
|
||||
I_p <- diag(1, p)
|
||||
|
||||
# Init variables for best attempt
|
||||
loss.best <- Inf
|
||||
# Init tracking of current best (according multiple attempts).
|
||||
V.best <- NULL
|
||||
loss.best <- Inf
|
||||
|
||||
# Take a view attempts with different starting values
|
||||
# Start loop for multiple attempts.
|
||||
for (attempt in 1:attempts) {
|
||||
|
||||
# reset step width `tau`
|
||||
|
@ -50,85 +51,63 @@ cve_simple <- function(X, Y, k,
|
|||
# optimization start value.
|
||||
V <- rStiefl(p, q)
|
||||
|
||||
## Initial loss calculation
|
||||
# Orthogonal projection to `span(V)`.
|
||||
Q <- I_p - (V %*% t(V))
|
||||
# Initial loss and gradient.
|
||||
loss <- Inf
|
||||
G <- grad(X, Y, V, h, loss.out = TRUE) # `loss.out=T` sets `loss`!
|
||||
# Set last loss (aka, loss after applying the step).
|
||||
loss.last <- loss
|
||||
|
||||
# Compute vectorized distance matrix `D`.
|
||||
vecD <- rowSums((X_diff %*% Q)^2)
|
||||
# Compute weights matrix `W`
|
||||
W <- matrix(1, n, n) # init (`exp(0) = 1` in the diagonal)
|
||||
W[lower.tri.ind] <- exp(vecD / (-2 * h)) # set lower triangular part
|
||||
W[upper.tri.ind] <- t(W)[upper.tri.ind] # mirror to upper triangular part
|
||||
W <- sweep(W, 2, colSums(W), FUN = `/`) # normalize columns
|
||||
|
||||
# Weighted `Y` momentums
|
||||
y1 <- Y %*% W # is 1D anyway, avoid transposing `W`
|
||||
y2 <- Y^2 %*% W
|
||||
|
||||
# Get per sample loss `L(V, X_i)`
|
||||
L <- y2 - y1^2
|
||||
# Sum to tolal loss `L(V)`
|
||||
loss <- mean(L)
|
||||
# Cayley transform matrix `A`
|
||||
A <- (G %*% t(V)) - (V %*% t(G))
|
||||
|
||||
## Start optimization loop.
|
||||
for (iter in 1:epochs) {
|
||||
for (epoch in 1:epochs) {
|
||||
# Apply learning rate `tau`.
|
||||
A.tau <- tau * A
|
||||
# Parallet transport (on Stiefl manifold) into direction of `G`.
|
||||
V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
|
||||
|
||||
# index according a lower triangular matrix stored in column major order
|
||||
# by only the `i` or `j` index.
|
||||
# vecW <- lower.tri.vector(W) + upper.tri.vector(W)
|
||||
vecW <- W[lower.tri.ind] + W[upper.tri.ind]
|
||||
S <- (L[tri.j] - (Y[tri.i] - y1[tri.j])^2) * vecW * vecD
|
||||
# Loss at position after a step.
|
||||
loss <- grad(X, Y, V.tau, h, loss.only = TRUE)
|
||||
|
||||
# Gradient
|
||||
G <- t(X_diff) %*% sweep(X_diff %*% V, 1, S, `*`);
|
||||
G <- (-2 / (n * h^2)) * G
|
||||
# Check if step is appropriate
|
||||
if ((loss - loss.last) > slack * loss.last) {
|
||||
tau <- tau / 2
|
||||
next() # Keep position and try with smaller `tau`.
|
||||
}
|
||||
|
||||
# Compute error.
|
||||
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
|
||||
# Check break condition (epoch check to skip ignored gradient calc).
|
||||
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
|
||||
if (error < tol | epoch >= epochs) {
|
||||
# take last step and stop optimization.
|
||||
V <- V.tau
|
||||
break()
|
||||
}
|
||||
|
||||
# Perform the step and remember previous loss.
|
||||
V <- V.tau
|
||||
loss.last <- loss
|
||||
|
||||
# Compute gradient at new position.
|
||||
# Note: `loss` will be updated too!
|
||||
G <- grad(X, Y, V, h, loss.out = TRUE, loss.log = TRUE)
|
||||
|
||||
# Cayley transform matrix `A`
|
||||
A <- (G %*% t(V)) - (V %*% t(G))
|
||||
|
||||
# Compute next `V` by step size `tau` unsing the Cayley transform
|
||||
# via a parallel transport into the gradient direction.
|
||||
A.tau <- tau * A
|
||||
V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
|
||||
|
||||
# Orthogonal projection to `span(V.tau)`.
|
||||
Q <- I_p - (V.tau %*% t(V.tau))
|
||||
|
||||
# Compute vectorized distance matrix `D`.
|
||||
vecD <- rowSums((X_diff %*% Q)^2)
|
||||
# Compute weights matrix `W` (only update values, diag keeps 1's)
|
||||
W[lower.tri.ind] <- exp(vecD / (-2 * h)) # set lower triangular part
|
||||
W[upper.tri.ind] <- t(W)[upper.tri.ind] # mirror to upper triangular part
|
||||
W <- sweep(W, 2, colSums(W), FUN = `/`) # normalize columns
|
||||
|
||||
# Weighted `Y` momentums
|
||||
y1 <- Y %*% W # is 1D anyway, avoid transposing `W`
|
||||
y2 <- Y^2 %*% W
|
||||
|
||||
# Get per sample loss `L(V, X_i)`
|
||||
L <- y2 - y1^2
|
||||
# Sum to tolal loss `L(V)`
|
||||
loss.tau <- mean(L)
|
||||
|
||||
# Check if step is appropriate
|
||||
if (loss != Inf & loss.tau - loss > slack * loss) {
|
||||
tau <- tau / 2
|
||||
} else {
|
||||
loss <- loss.tau
|
||||
V <- V.tau
|
||||
}
|
||||
}
|
||||
|
||||
# Check if current attempt improved previous ones
|
||||
if (loss.tau < loss.best) {
|
||||
loss.best <- loss.tau
|
||||
V.best <- V.tau
|
||||
if (loss < loss.best) {
|
||||
loss.best <- loss
|
||||
V.best <- V
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return(list(
|
||||
loss.history = loss.history,
|
||||
loss = loss.best,
|
||||
V = V.best,
|
||||
B = null(V.best),
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
#' value loss is returned and \code{envir} is ignored.
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
grad <- function(X, Y, V, h, loss.only = FALSE, loss.out = FALSE) {
|
||||
grad <- function(X, Y, V, h, loss.out = FALSE, loss.log = FALSE, loss.only = FALSE) {
|
||||
# Get number of samples and dimension.
|
||||
n <- nrow(X)
|
||||
p <- ncol(X)
|
||||
|
@ -23,12 +23,12 @@ grad <- function(X, Y, V, h, loss.only = FALSE, loss.out = FALSE) {
|
|||
lower <- index[lower.tri(index)]
|
||||
upper <- t(index)[lower]
|
||||
|
||||
# Create all pairewise differences of rows of `X`.
|
||||
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||
|
||||
# Projection matrix onto `span(V)`
|
||||
Q <- diag(1, p) - (V %*% t(V))
|
||||
|
||||
# Create all pairewise differences of rows of `X`.
|
||||
X_diff <- X[i, , drop=F] - X[j, , drop=F]
|
||||
|
||||
# Vectorized distance matrix `D`.
|
||||
vecD <- rowSums((X_diff %*% Q)^2)
|
||||
|
||||
|
@ -36,7 +36,7 @@ grad <- function(X, Y, V, h, loss.only = FALSE, loss.out = FALSE) {
|
|||
W <- matrix(dnorm(0), n, n)
|
||||
W[lower] <- dnorm(vecD / h) # Set lower tri. part
|
||||
W[upper] <- t(W)[upper] # Mirror lower tri. to upper
|
||||
W <- sweep(W, 2, colSums(W), FUN=`/`) # Col-Normalize
|
||||
W <- sweep(W, 2, colSums(W), FUN = `/`) # Col-Normalize
|
||||
|
||||
# Weighted `Y` momentums
|
||||
y1 <- Y %*% W # Result is 1D -> transposition irrelevant
|
||||
|
@ -44,12 +44,19 @@ grad <- function(X, Y, V, h, loss.only = FALSE, loss.out = FALSE) {
|
|||
|
||||
# Per example loss `L(V, X_i)`
|
||||
L <- y2 - y1^2
|
||||
if (loss.only) {
|
||||
# Mean for total loss `L(V)`.
|
||||
return(mean(L))
|
||||
} else if (loss.out) {
|
||||
# Bubble environments up and write to loss variable, aka out param.
|
||||
loss <<- mean(L)
|
||||
if (loss.out | loss.log | loss.only) {
|
||||
meanL <- mean(L)
|
||||
if (loss.out) {
|
||||
# Bubble environments up and write to loss variable, aka out param.
|
||||
loss <<- meanL
|
||||
}
|
||||
if (loss.log) {
|
||||
loss.history[epoch, attempt] <<- meanL
|
||||
}
|
||||
if (loss.only) {
|
||||
# Mean for total loss `L(V)`.
|
||||
return(meanL)
|
||||
}
|
||||
}
|
||||
|
||||
# Vectorized Weights with forced symmetry
|
||||
|
|
|
@ -4,7 +4,8 @@
|
|||
\alias{grad}
|
||||
\title{Compute get gradient of `L(V)` given a dataset `X`.}
|
||||
\usage{
|
||||
grad(X, Y, V, h, loss.only = FALSE, loss.out = FALSE)
|
||||
grad(X, Y, V, h, loss.out = FALSE, loss.log = FALSE,
|
||||
loss.only = FALSE)
|
||||
}
|
||||
\arguments{
|
||||
\item{X}{Data matrix.}
|
||||
|
|
|
@ -0,0 +1,113 @@
|
|||
## Build and install.
|
||||
To build the package the `devtools` package is used. This also provides `roxygen2` which is used for documentation and authomatic creaton of the `NAMESPACE` file.
|
||||
```R
|
||||
setwd("./CVE_R") # Set path to the package root.
|
||||
library(devtools) # Load required `devtools` package.
|
||||
document() # Create `.Rd` files and write `NAMESPACE`.
|
||||
```
|
||||
Next the package needs to be build, therefore (if pure `R` package, aka. `C/C++`, `Fortran`, ... code) just do the following.
|
||||
```bash
|
||||
R CMD build CVE_R
|
||||
R CMD INSTALL CVE_0.1.tar.gz
|
||||
```
|
||||
Then we are ready for using the package.
|
||||
```R
|
||||
library(CVE)
|
||||
help(package = "CVE")
|
||||
```
|
||||
## Build and install from within `R`.
|
||||
An alternative approach is the following.
|
||||
```R
|
||||
setwd('./CVE_R')
|
||||
getwd()
|
||||
|
||||
library(devtools)
|
||||
document()
|
||||
# No vignettes to build but "inst/doc/" is required!
|
||||
(path <- build(vignettes = FALSE))
|
||||
install.packages(path, repos = NULL, type = "source")
|
||||
```
|
||||
**Note: I only recommend this approach during development.**
|
||||
|
||||
## Reading log files.
|
||||
The runtime tests (upcomming further tests) are creating log files saved in `tmp/`. These log files are `CSV` files (actualy `TSV`) with a header storing the test results. Depending on the test the files may contain differnt data. As an example we use the runtime test logs which store in each line the `dataset`, the used `method` as well as the `error` (actual error of estimated `B` against real `B`) and the `time`. For reading and analysing the data see the following example.
|
||||
```R
|
||||
# Load log as `data.frame`
|
||||
test0 <- read.csv('tmp/test0.log', sep = '\t')
|
||||
# Create a error boxplot grouped by dataset.
|
||||
boxplot(error ~ dataset, test0)
|
||||
```
|
||||
|
||||
## Environments and variable lookup.
|
||||
In the following a view simple examples of how `R` searches for variables.
|
||||
In addition we manipulate funciton closures to alter the search path in variable lookup and outer scope variable manipulation.
|
||||
```R
|
||||
droids <- "These aren't the droids you're looking for."
|
||||
|
||||
search <- function() {
|
||||
print(droids)
|
||||
}
|
||||
|
||||
trooper.seeks <- function() {
|
||||
droids <- c("R2-D2", "C-3PO")
|
||||
search()
|
||||
}
|
||||
|
||||
jedi.seeks <- function() {
|
||||
droids <- c("R2-D2", "C-3PO")
|
||||
environment(search) <- environment()
|
||||
search()
|
||||
}
|
||||
|
||||
trooper.seeks()
|
||||
jedi.seeks()
|
||||
```
|
||||
|
||||
The next example ilustrates how to write (without local copies) to variables outside the functions local environment.
|
||||
```R
|
||||
counting <- function() {
|
||||
count <<- count + 1 # Note the `<<-` assignment.
|
||||
}
|
||||
|
||||
(function() {
|
||||
environment(counting) <- environment()
|
||||
count <- 0
|
||||
|
||||
for (i in 1:10) {
|
||||
counting()
|
||||
}
|
||||
|
||||
return(count)
|
||||
})()
|
||||
|
||||
(function () {
|
||||
closure <- new.env()
|
||||
environment(counting) <- closure
|
||||
assign("count", 0, envir = closure)
|
||||
|
||||
for (i in 1:10) {
|
||||
counting()
|
||||
}
|
||||
|
||||
return(closure$count)
|
||||
})()
|
||||
```
|
||||
|
||||
Another example for the usage of `do.call` where the evaluation of parameters is illustated (example taken (and altered) from `?do.call`).
|
||||
```R
|
||||
## examples of where objects will be found.
|
||||
A <- "A.Global"
|
||||
f <- function(x) print(paste("f.new", x))
|
||||
env <- new.env()
|
||||
assign("A", "A.new", envir = env)
|
||||
assign("f", f, envir = env)
|
||||
f <- function(x) print(paste("f.Global", x))
|
||||
f(A) # f.Global A.Global
|
||||
do.call("f", list(A)) # f.Global A.Global
|
||||
do.call("f", list(A), envir = env) # f.new A.Global
|
||||
do.call(f, list(A), envir = env) # f.Global A.Global
|
||||
do.call("f", list(quote(A)), envir = env) # f.new A.new
|
||||
do.call(f, list(quote(A)), envir = env) # f.Global A.new
|
||||
do.call("f", list(as.name("A")), envir = env) # f.new A.new
|
||||
do.call("f", list(as.name("A")), envir = env) # f.new A.new
|
||||
```
|
Loading…
Reference in New Issue