parent
7155d0e9db
commit
1c120ec67c
|
@ -37,7 +37,8 @@ cve_sgd <- function(X, Y, k,
|
||||||
# Reset learning rate `tau`.
|
# Reset learning rate `tau`.
|
||||||
tau <- tau.init
|
tau <- tau.init
|
||||||
|
|
||||||
# Sample a starting basis from the Stiefl manifold.
|
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
|
||||||
|
# optimization start value.
|
||||||
V <- rStiefl(p, q)
|
V <- rStiefl(p, q)
|
||||||
|
|
||||||
# Repeat `epochs` times
|
# Repeat `epochs` times
|
||||||
|
|
|
@ -12,35 +12,36 @@ cve_simple <- function(X, Y, k,
|
||||||
epochs = 50L,
|
epochs = 50L,
|
||||||
attempts = 10L
|
attempts = 10L
|
||||||
) {
|
) {
|
||||||
# Addapt tolearance for break condition
|
# Set `grad` functions environment to enable if to find this environments
|
||||||
tol <- sqrt(2 * k) * tol
|
# local variabels, needed to enable the manipulation of this local variables
|
||||||
tau.init <- tau # remember to reset for new attempt
|
# from within `grad`.
|
||||||
|
environment(grad) <- environment()
|
||||||
|
|
||||||
|
# Setup loss histroy.
|
||||||
|
loss.history <- matrix(NA, epochs, attempts);
|
||||||
|
|
||||||
# Get dimensions.
|
# Get dimensions.
|
||||||
n <- nrow(X)
|
n <- nrow(X)
|
||||||
p <- ncol(X)
|
p <- ncol(X)
|
||||||
q <- p - k
|
q <- p - k
|
||||||
|
|
||||||
|
# Save initial learning rate `tau`.
|
||||||
|
tau.init <- tau
|
||||||
|
# Addapt tolearance for break condition.
|
||||||
|
tol <- sqrt(2 * q) * tol
|
||||||
|
|
||||||
# Estaimate bandwidth if not given.
|
# Estaimate bandwidth if not given.
|
||||||
if (missing(h) | !is.numeric(h)) {
|
if (missing(h) | !is.numeric(h)) {
|
||||||
h <- estimate.bandwidth(X, k, nObs)
|
h <- estimate.bandwidth(X, k, nObs)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Compue all static data.
|
|
||||||
X_diff <- row.pair.apply(X, `-`)
|
|
||||||
index <- matrix(seq(n * n), n, n)
|
|
||||||
tri.i <- row.pair.apply(index[, 1, drop = FALSE], function(i, j) { i })
|
|
||||||
tri.j <- row.pair.apply(index[, 1, drop = FALSE], function(i, j) { j })
|
|
||||||
lower.tri.ind <- index[lower.tri(index)]
|
|
||||||
upper.tri.ind <- t(index)[lower.tri.ind] # ATTENTION: corret order
|
|
||||||
|
|
||||||
I_p <- diag(1, p)
|
I_p <- diag(1, p)
|
||||||
|
|
||||||
# Init variables for best attempt
|
# Init tracking of current best (according multiple attempts).
|
||||||
loss.best <- Inf
|
|
||||||
V.best <- NULL
|
V.best <- NULL
|
||||||
|
loss.best <- Inf
|
||||||
|
|
||||||
# Take a view attempts with different starting values
|
# Start loop for multiple attempts.
|
||||||
for (attempt in 1:attempts) {
|
for (attempt in 1:attempts) {
|
||||||
|
|
||||||
# reset step width `tau`
|
# reset step width `tau`
|
||||||
|
@ -50,85 +51,63 @@ cve_simple <- function(X, Y, k,
|
||||||
# optimization start value.
|
# optimization start value.
|
||||||
V <- rStiefl(p, q)
|
V <- rStiefl(p, q)
|
||||||
|
|
||||||
## Initial loss calculation
|
# Initial loss and gradient.
|
||||||
# Orthogonal projection to `span(V)`.
|
loss <- Inf
|
||||||
Q <- I_p - (V %*% t(V))
|
G <- grad(X, Y, V, h, loss.out = TRUE) # `loss.out=T` sets `loss`!
|
||||||
|
# Set last loss (aka, loss after applying the step).
|
||||||
# Compute vectorized distance matrix `D`.
|
loss.last <- loss
|
||||||
vecD <- rowSums((X_diff %*% Q)^2)
|
|
||||||
# Compute weights matrix `W`
|
|
||||||
W <- matrix(1, n, n) # init (`exp(0) = 1` in the diagonal)
|
|
||||||
W[lower.tri.ind] <- exp(vecD / (-2 * h)) # set lower triangular part
|
|
||||||
W[upper.tri.ind] <- t(W)[upper.tri.ind] # mirror to upper triangular part
|
|
||||||
W <- sweep(W, 2, colSums(W), FUN = `/`) # normalize columns
|
|
||||||
|
|
||||||
# Weighted `Y` momentums
|
|
||||||
y1 <- Y %*% W # is 1D anyway, avoid transposing `W`
|
|
||||||
y2 <- Y^2 %*% W
|
|
||||||
|
|
||||||
# Get per sample loss `L(V, X_i)`
|
|
||||||
L <- y2 - y1^2
|
|
||||||
# Sum to tolal loss `L(V)`
|
|
||||||
loss <- mean(L)
|
|
||||||
|
|
||||||
## Start optimization loop.
|
|
||||||
for (iter in 1:epochs) {
|
|
||||||
|
|
||||||
# index according a lower triangular matrix stored in column major order
|
|
||||||
# by only the `i` or `j` index.
|
|
||||||
# vecW <- lower.tri.vector(W) + upper.tri.vector(W)
|
|
||||||
vecW <- W[lower.tri.ind] + W[upper.tri.ind]
|
|
||||||
S <- (L[tri.j] - (Y[tri.i] - y1[tri.j])^2) * vecW * vecD
|
|
||||||
|
|
||||||
# Gradient
|
|
||||||
G <- t(X_diff) %*% sweep(X_diff %*% V, 1, S, `*`);
|
|
||||||
G <- (-2 / (n * h^2)) * G
|
|
||||||
|
|
||||||
# Cayley transform matrix `A`
|
# Cayley transform matrix `A`
|
||||||
A <- (G %*% t(V)) - (V %*% t(G))
|
A <- (G %*% t(V)) - (V %*% t(G))
|
||||||
|
|
||||||
# Compute next `V` by step size `tau` unsing the Cayley transform
|
## Start optimization loop.
|
||||||
# via a parallel transport into the gradient direction.
|
for (epoch in 1:epochs) {
|
||||||
|
# Apply learning rate `tau`.
|
||||||
A.tau <- tau * A
|
A.tau <- tau * A
|
||||||
|
# Parallet transport (on Stiefl manifold) into direction of `G`.
|
||||||
V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
|
V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
|
||||||
|
|
||||||
# Orthogonal projection to `span(V.tau)`.
|
# Loss at position after a step.
|
||||||
Q <- I_p - (V.tau %*% t(V.tau))
|
loss <- grad(X, Y, V.tau, h, loss.only = TRUE)
|
||||||
|
|
||||||
# Compute vectorized distance matrix `D`.
|
|
||||||
vecD <- rowSums((X_diff %*% Q)^2)
|
|
||||||
# Compute weights matrix `W` (only update values, diag keeps 1's)
|
|
||||||
W[lower.tri.ind] <- exp(vecD / (-2 * h)) # set lower triangular part
|
|
||||||
W[upper.tri.ind] <- t(W)[upper.tri.ind] # mirror to upper triangular part
|
|
||||||
W <- sweep(W, 2, colSums(W), FUN = `/`) # normalize columns
|
|
||||||
|
|
||||||
# Weighted `Y` momentums
|
|
||||||
y1 <- Y %*% W # is 1D anyway, avoid transposing `W`
|
|
||||||
y2 <- Y^2 %*% W
|
|
||||||
|
|
||||||
# Get per sample loss `L(V, X_i)`
|
|
||||||
L <- y2 - y1^2
|
|
||||||
# Sum to tolal loss `L(V)`
|
|
||||||
loss.tau <- mean(L)
|
|
||||||
|
|
||||||
# Check if step is appropriate
|
# Check if step is appropriate
|
||||||
if (loss != Inf & loss.tau - loss > slack * loss) {
|
if ((loss - loss.last) > slack * loss.last) {
|
||||||
tau <- tau / 2
|
tau <- tau / 2
|
||||||
} else {
|
next() # Keep position and try with smaller `tau`.
|
||||||
loss <- loss.tau
|
|
||||||
V <- V.tau
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Compute error.
|
||||||
|
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
|
||||||
|
# Check break condition (epoch check to skip ignored gradient calc).
|
||||||
|
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
|
||||||
|
if (error < tol | epoch >= epochs) {
|
||||||
|
# take last step and stop optimization.
|
||||||
|
V <- V.tau
|
||||||
|
break()
|
||||||
|
}
|
||||||
|
|
||||||
|
# Perform the step and remember previous loss.
|
||||||
|
V <- V.tau
|
||||||
|
loss.last <- loss
|
||||||
|
|
||||||
|
# Compute gradient at new position.
|
||||||
|
# Note: `loss` will be updated too!
|
||||||
|
G <- grad(X, Y, V, h, loss.out = TRUE, loss.log = TRUE)
|
||||||
|
|
||||||
|
# Cayley transform matrix `A`
|
||||||
|
A <- (G %*% t(V)) - (V %*% t(G))
|
||||||
}
|
}
|
||||||
|
|
||||||
# Check if current attempt improved previous ones
|
# Check if current attempt improved previous ones
|
||||||
if (loss.tau < loss.best) {
|
if (loss < loss.best) {
|
||||||
loss.best <- loss.tau
|
loss.best <- loss
|
||||||
V.best <- V.tau
|
V.best <- V
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return(list(
|
return(list(
|
||||||
|
loss.history = loss.history,
|
||||||
loss = loss.best,
|
loss = loss.best,
|
||||||
V = V.best,
|
V = V.best,
|
||||||
B = null(V.best),
|
B = null(V.best),
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
#' value loss is returned and \code{envir} is ignored.
|
#' value loss is returned and \code{envir} is ignored.
|
||||||
#' @keywords internal
|
#' @keywords internal
|
||||||
#' @export
|
#' @export
|
||||||
grad <- function(X, Y, V, h, loss.only = FALSE, loss.out = FALSE) {
|
grad <- function(X, Y, V, h, loss.out = FALSE, loss.log = FALSE, loss.only = FALSE) {
|
||||||
# Get number of samples and dimension.
|
# Get number of samples and dimension.
|
||||||
n <- nrow(X)
|
n <- nrow(X)
|
||||||
p <- ncol(X)
|
p <- ncol(X)
|
||||||
|
@ -23,12 +23,12 @@ grad <- function(X, Y, V, h, loss.only = FALSE, loss.out = FALSE) {
|
||||||
lower <- index[lower.tri(index)]
|
lower <- index[lower.tri(index)]
|
||||||
upper <- t(index)[lower]
|
upper <- t(index)[lower]
|
||||||
|
|
||||||
|
# Create all pairewise differences of rows of `X`.
|
||||||
|
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||||
|
|
||||||
# Projection matrix onto `span(V)`
|
# Projection matrix onto `span(V)`
|
||||||
Q <- diag(1, p) - (V %*% t(V))
|
Q <- diag(1, p) - (V %*% t(V))
|
||||||
|
|
||||||
# Create all pairewise differences of rows of `X`.
|
|
||||||
X_diff <- X[i, , drop=F] - X[j, , drop=F]
|
|
||||||
|
|
||||||
# Vectorized distance matrix `D`.
|
# Vectorized distance matrix `D`.
|
||||||
vecD <- rowSums((X_diff %*% Q)^2)
|
vecD <- rowSums((X_diff %*% Q)^2)
|
||||||
|
|
||||||
|
@ -36,7 +36,7 @@ grad <- function(X, Y, V, h, loss.only = FALSE, loss.out = FALSE) {
|
||||||
W <- matrix(dnorm(0), n, n)
|
W <- matrix(dnorm(0), n, n)
|
||||||
W[lower] <- dnorm(vecD / h) # Set lower tri. part
|
W[lower] <- dnorm(vecD / h) # Set lower tri. part
|
||||||
W[upper] <- t(W)[upper] # Mirror lower tri. to upper
|
W[upper] <- t(W)[upper] # Mirror lower tri. to upper
|
||||||
W <- sweep(W, 2, colSums(W), FUN=`/`) # Col-Normalize
|
W <- sweep(W, 2, colSums(W), FUN = `/`) # Col-Normalize
|
||||||
|
|
||||||
# Weighted `Y` momentums
|
# Weighted `Y` momentums
|
||||||
y1 <- Y %*% W # Result is 1D -> transposition irrelevant
|
y1 <- Y %*% W # Result is 1D -> transposition irrelevant
|
||||||
|
@ -44,12 +44,19 @@ grad <- function(X, Y, V, h, loss.only = FALSE, loss.out = FALSE) {
|
||||||
|
|
||||||
# Per example loss `L(V, X_i)`
|
# Per example loss `L(V, X_i)`
|
||||||
L <- y2 - y1^2
|
L <- y2 - y1^2
|
||||||
|
if (loss.out | loss.log | loss.only) {
|
||||||
|
meanL <- mean(L)
|
||||||
|
if (loss.out) {
|
||||||
|
# Bubble environments up and write to loss variable, aka out param.
|
||||||
|
loss <<- meanL
|
||||||
|
}
|
||||||
|
if (loss.log) {
|
||||||
|
loss.history[epoch, attempt] <<- meanL
|
||||||
|
}
|
||||||
if (loss.only) {
|
if (loss.only) {
|
||||||
# Mean for total loss `L(V)`.
|
# Mean for total loss `L(V)`.
|
||||||
return(mean(L))
|
return(meanL)
|
||||||
} else if (loss.out) {
|
}
|
||||||
# Bubble environments up and write to loss variable, aka out param.
|
|
||||||
loss <<- mean(L)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Vectorized Weights with forced symmetry
|
# Vectorized Weights with forced symmetry
|
||||||
|
|
|
@ -4,7 +4,8 @@
|
||||||
\alias{grad}
|
\alias{grad}
|
||||||
\title{Compute get gradient of `L(V)` given a dataset `X`.}
|
\title{Compute get gradient of `L(V)` given a dataset `X`.}
|
||||||
\usage{
|
\usage{
|
||||||
grad(X, Y, V, h, loss.only = FALSE, loss.out = FALSE)
|
grad(X, Y, V, h, loss.out = FALSE, loss.log = FALSE,
|
||||||
|
loss.only = FALSE)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{X}{Data matrix.}
|
\item{X}{Data matrix.}
|
||||||
|
|
|
@ -0,0 +1,113 @@
|
||||||
|
## Build and install.
|
||||||
|
To build the package the `devtools` package is used. This also provides `roxygen2` which is used for documentation and authomatic creaton of the `NAMESPACE` file.
|
||||||
|
```R
|
||||||
|
setwd("./CVE_R") # Set path to the package root.
|
||||||
|
library(devtools) # Load required `devtools` package.
|
||||||
|
document() # Create `.Rd` files and write `NAMESPACE`.
|
||||||
|
```
|
||||||
|
Next the package needs to be build, therefore (if pure `R` package, aka. `C/C++`, `Fortran`, ... code) just do the following.
|
||||||
|
```bash
|
||||||
|
R CMD build CVE_R
|
||||||
|
R CMD INSTALL CVE_0.1.tar.gz
|
||||||
|
```
|
||||||
|
Then we are ready for using the package.
|
||||||
|
```R
|
||||||
|
library(CVE)
|
||||||
|
help(package = "CVE")
|
||||||
|
```
|
||||||
|
## Build and install from within `R`.
|
||||||
|
An alternative approach is the following.
|
||||||
|
```R
|
||||||
|
setwd('./CVE_R')
|
||||||
|
getwd()
|
||||||
|
|
||||||
|
library(devtools)
|
||||||
|
document()
|
||||||
|
# No vignettes to build but "inst/doc/" is required!
|
||||||
|
(path <- build(vignettes = FALSE))
|
||||||
|
install.packages(path, repos = NULL, type = "source")
|
||||||
|
```
|
||||||
|
**Note: I only recommend this approach during development.**
|
||||||
|
|
||||||
|
## Reading log files.
|
||||||
|
The runtime tests (upcomming further tests) are creating log files saved in `tmp/`. These log files are `CSV` files (actualy `TSV`) with a header storing the test results. Depending on the test the files may contain differnt data. As an example we use the runtime test logs which store in each line the `dataset`, the used `method` as well as the `error` (actual error of estimated `B` against real `B`) and the `time`. For reading and analysing the data see the following example.
|
||||||
|
```R
|
||||||
|
# Load log as `data.frame`
|
||||||
|
test0 <- read.csv('tmp/test0.log', sep = '\t')
|
||||||
|
# Create a error boxplot grouped by dataset.
|
||||||
|
boxplot(error ~ dataset, test0)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Environments and variable lookup.
|
||||||
|
In the following a view simple examples of how `R` searches for variables.
|
||||||
|
In addition we manipulate funciton closures to alter the search path in variable lookup and outer scope variable manipulation.
|
||||||
|
```R
|
||||||
|
droids <- "These aren't the droids you're looking for."
|
||||||
|
|
||||||
|
search <- function() {
|
||||||
|
print(droids)
|
||||||
|
}
|
||||||
|
|
||||||
|
trooper.seeks <- function() {
|
||||||
|
droids <- c("R2-D2", "C-3PO")
|
||||||
|
search()
|
||||||
|
}
|
||||||
|
|
||||||
|
jedi.seeks <- function() {
|
||||||
|
droids <- c("R2-D2", "C-3PO")
|
||||||
|
environment(search) <- environment()
|
||||||
|
search()
|
||||||
|
}
|
||||||
|
|
||||||
|
trooper.seeks()
|
||||||
|
jedi.seeks()
|
||||||
|
```
|
||||||
|
|
||||||
|
The next example ilustrates how to write (without local copies) to variables outside the functions local environment.
|
||||||
|
```R
|
||||||
|
counting <- function() {
|
||||||
|
count <<- count + 1 # Note the `<<-` assignment.
|
||||||
|
}
|
||||||
|
|
||||||
|
(function() {
|
||||||
|
environment(counting) <- environment()
|
||||||
|
count <- 0
|
||||||
|
|
||||||
|
for (i in 1:10) {
|
||||||
|
counting()
|
||||||
|
}
|
||||||
|
|
||||||
|
return(count)
|
||||||
|
})()
|
||||||
|
|
||||||
|
(function () {
|
||||||
|
closure <- new.env()
|
||||||
|
environment(counting) <- closure
|
||||||
|
assign("count", 0, envir = closure)
|
||||||
|
|
||||||
|
for (i in 1:10) {
|
||||||
|
counting()
|
||||||
|
}
|
||||||
|
|
||||||
|
return(closure$count)
|
||||||
|
})()
|
||||||
|
```
|
||||||
|
|
||||||
|
Another example for the usage of `do.call` where the evaluation of parameters is illustated (example taken (and altered) from `?do.call`).
|
||||||
|
```R
|
||||||
|
## examples of where objects will be found.
|
||||||
|
A <- "A.Global"
|
||||||
|
f <- function(x) print(paste("f.new", x))
|
||||||
|
env <- new.env()
|
||||||
|
assign("A", "A.new", envir = env)
|
||||||
|
assign("f", f, envir = env)
|
||||||
|
f <- function(x) print(paste("f.Global", x))
|
||||||
|
f(A) # f.Global A.Global
|
||||||
|
do.call("f", list(A)) # f.Global A.Global
|
||||||
|
do.call("f", list(A), envir = env) # f.new A.Global
|
||||||
|
do.call(f, list(A), envir = env) # f.Global A.Global
|
||||||
|
do.call("f", list(quote(A)), envir = env) # f.new A.new
|
||||||
|
do.call(f, list(quote(A)), envir = env) # f.Global A.new
|
||||||
|
do.call("f", list(as.name("A")), envir = env) # f.new A.new
|
||||||
|
do.call("f", list(as.name("A")), envir = env) # f.new A.new
|
||||||
|
```
|
Loading…
Reference in New Issue