2
0
Fork 0

fix: cve_simple runs correct,

add: notes
This commit is contained in:
Daniel Kapla 2019-09-02 15:22:35 +02:00
parent 7155d0e9db
commit 1c120ec67c
5 changed files with 191 additions and 90 deletions

View File

@ -37,7 +37,8 @@ cve_sgd <- function(X, Y, k,
# Reset learning rate `tau`. # Reset learning rate `tau`.
tau <- tau.init tau <- tau.init
# Sample a starting basis from the Stiefl manifold. # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
# optimization start value.
V <- rStiefl(p, q) V <- rStiefl(p, q)
# Repeat `epochs` times # Repeat `epochs` times

View File

@ -12,35 +12,36 @@ cve_simple <- function(X, Y, k,
epochs = 50L, epochs = 50L,
attempts = 10L attempts = 10L
) { ) {
# Addapt tolearance for break condition # Set `grad` functions environment to enable if to find this environments
tol <- sqrt(2 * k) * tol # local variabels, needed to enable the manipulation of this local variables
tau.init <- tau # remember to reset for new attempt # from within `grad`.
environment(grad) <- environment()
# Setup loss histroy.
loss.history <- matrix(NA, epochs, attempts);
# Get dimensions. # Get dimensions.
n <- nrow(X) n <- nrow(X)
p <- ncol(X) p <- ncol(X)
q <- p - k q <- p - k
# Save initial learning rate `tau`.
tau.init <- tau
# Addapt tolearance for break condition.
tol <- sqrt(2 * q) * tol
# Estaimate bandwidth if not given. # Estaimate bandwidth if not given.
if (missing(h) | !is.numeric(h)) { if (missing(h) | !is.numeric(h)) {
h <- estimate.bandwidth(X, k, nObs) h <- estimate.bandwidth(X, k, nObs)
} }
# Compue all static data.
X_diff <- row.pair.apply(X, `-`)
index <- matrix(seq(n * n), n, n)
tri.i <- row.pair.apply(index[, 1, drop = FALSE], function(i, j) { i })
tri.j <- row.pair.apply(index[, 1, drop = FALSE], function(i, j) { j })
lower.tri.ind <- index[lower.tri(index)]
upper.tri.ind <- t(index)[lower.tri.ind] # ATTENTION: corret order
I_p <- diag(1, p) I_p <- diag(1, p)
# Init variables for best attempt # Init tracking of current best (according multiple attempts).
loss.best <- Inf
V.best <- NULL V.best <- NULL
loss.best <- Inf
# Take a view attempts with different starting values # Start loop for multiple attempts.
for (attempt in 1:attempts) { for (attempt in 1:attempts) {
# reset step width `tau` # reset step width `tau`
@ -50,85 +51,63 @@ cve_simple <- function(X, Y, k,
# optimization start value. # optimization start value.
V <- rStiefl(p, q) V <- rStiefl(p, q)
## Initial loss calculation # Initial loss and gradient.
# Orthogonal projection to `span(V)`. loss <- Inf
Q <- I_p - (V %*% t(V)) G <- grad(X, Y, V, h, loss.out = TRUE) # `loss.out=T` sets `loss`!
# Set last loss (aka, loss after applying the step).
# Compute vectorized distance matrix `D`. loss.last <- loss
vecD <- rowSums((X_diff %*% Q)^2)
# Compute weights matrix `W`
W <- matrix(1, n, n) # init (`exp(0) = 1` in the diagonal)
W[lower.tri.ind] <- exp(vecD / (-2 * h)) # set lower triangular part
W[upper.tri.ind] <- t(W)[upper.tri.ind] # mirror to upper triangular part
W <- sweep(W, 2, colSums(W), FUN = `/`) # normalize columns
# Weighted `Y` momentums
y1 <- Y %*% W # is 1D anyway, avoid transposing `W`
y2 <- Y^2 %*% W
# Get per sample loss `L(V, X_i)`
L <- y2 - y1^2
# Sum to tolal loss `L(V)`
loss <- mean(L)
## Start optimization loop.
for (iter in 1:epochs) {
# index according a lower triangular matrix stored in column major order
# by only the `i` or `j` index.
# vecW <- lower.tri.vector(W) + upper.tri.vector(W)
vecW <- W[lower.tri.ind] + W[upper.tri.ind]
S <- (L[tri.j] - (Y[tri.i] - y1[tri.j])^2) * vecW * vecD
# Gradient
G <- t(X_diff) %*% sweep(X_diff %*% V, 1, S, `*`);
G <- (-2 / (n * h^2)) * G
# Cayley transform matrix `A` # Cayley transform matrix `A`
A <- (G %*% t(V)) - (V %*% t(G)) A <- (G %*% t(V)) - (V %*% t(G))
# Compute next `V` by step size `tau` unsing the Cayley transform ## Start optimization loop.
# via a parallel transport into the gradient direction. for (epoch in 1:epochs) {
# Apply learning rate `tau`.
A.tau <- tau * A A.tau <- tau * A
# Parallet transport (on Stiefl manifold) into direction of `G`.
V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V) V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
# Orthogonal projection to `span(V.tau)`. # Loss at position after a step.
Q <- I_p - (V.tau %*% t(V.tau)) loss <- grad(X, Y, V.tau, h, loss.only = TRUE)
# Compute vectorized distance matrix `D`.
vecD <- rowSums((X_diff %*% Q)^2)
# Compute weights matrix `W` (only update values, diag keeps 1's)
W[lower.tri.ind] <- exp(vecD / (-2 * h)) # set lower triangular part
W[upper.tri.ind] <- t(W)[upper.tri.ind] # mirror to upper triangular part
W <- sweep(W, 2, colSums(W), FUN = `/`) # normalize columns
# Weighted `Y` momentums
y1 <- Y %*% W # is 1D anyway, avoid transposing `W`
y2 <- Y^2 %*% W
# Get per sample loss `L(V, X_i)`
L <- y2 - y1^2
# Sum to tolal loss `L(V)`
loss.tau <- mean(L)
# Check if step is appropriate # Check if step is appropriate
if (loss != Inf & loss.tau - loss > slack * loss) { if ((loss - loss.last) > slack * loss.last) {
tau <- tau / 2 tau <- tau / 2
} else { next() # Keep position and try with smaller `tau`.
loss <- loss.tau
V <- V.tau
} }
# Compute error.
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
# Check break condition (epoch check to skip ignored gradient calc).
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
if (error < tol | epoch >= epochs) {
# take last step and stop optimization.
V <- V.tau
break()
}
# Perform the step and remember previous loss.
V <- V.tau
loss.last <- loss
# Compute gradient at new position.
# Note: `loss` will be updated too!
G <- grad(X, Y, V, h, loss.out = TRUE, loss.log = TRUE)
# Cayley transform matrix `A`
A <- (G %*% t(V)) - (V %*% t(G))
} }
# Check if current attempt improved previous ones # Check if current attempt improved previous ones
if (loss.tau < loss.best) { if (loss < loss.best) {
loss.best <- loss.tau loss.best <- loss
V.best <- V.tau V.best <- V
} }
} }
return(list( return(list(
loss.history = loss.history,
loss = loss.best, loss = loss.best,
V = V.best, V = V.best,
B = null(V.best), B = null(V.best),

View File

@ -8,7 +8,7 @@
#' value loss is returned and \code{envir} is ignored. #' value loss is returned and \code{envir} is ignored.
#' @keywords internal #' @keywords internal
#' @export #' @export
grad <- function(X, Y, V, h, loss.only = FALSE, loss.out = FALSE) { grad <- function(X, Y, V, h, loss.out = FALSE, loss.log = FALSE, loss.only = FALSE) {
# Get number of samples and dimension. # Get number of samples and dimension.
n <- nrow(X) n <- nrow(X)
p <- ncol(X) p <- ncol(X)
@ -23,12 +23,12 @@ grad <- function(X, Y, V, h, loss.only = FALSE, loss.out = FALSE) {
lower <- index[lower.tri(index)] lower <- index[lower.tri(index)]
upper <- t(index)[lower] upper <- t(index)[lower]
# Create all pairewise differences of rows of `X`.
X_diff <- X[i, , drop = F] - X[j, , drop = F]
# Projection matrix onto `span(V)` # Projection matrix onto `span(V)`
Q <- diag(1, p) - (V %*% t(V)) Q <- diag(1, p) - (V %*% t(V))
# Create all pairewise differences of rows of `X`.
X_diff <- X[i, , drop=F] - X[j, , drop=F]
# Vectorized distance matrix `D`. # Vectorized distance matrix `D`.
vecD <- rowSums((X_diff %*% Q)^2) vecD <- rowSums((X_diff %*% Q)^2)
@ -36,7 +36,7 @@ grad <- function(X, Y, V, h, loss.only = FALSE, loss.out = FALSE) {
W <- matrix(dnorm(0), n, n) W <- matrix(dnorm(0), n, n)
W[lower] <- dnorm(vecD / h) # Set lower tri. part W[lower] <- dnorm(vecD / h) # Set lower tri. part
W[upper] <- t(W)[upper] # Mirror lower tri. to upper W[upper] <- t(W)[upper] # Mirror lower tri. to upper
W <- sweep(W, 2, colSums(W), FUN=`/`) # Col-Normalize W <- sweep(W, 2, colSums(W), FUN = `/`) # Col-Normalize
# Weighted `Y` momentums # Weighted `Y` momentums
y1 <- Y %*% W # Result is 1D -> transposition irrelevant y1 <- Y %*% W # Result is 1D -> transposition irrelevant
@ -44,12 +44,19 @@ grad <- function(X, Y, V, h, loss.only = FALSE, loss.out = FALSE) {
# Per example loss `L(V, X_i)` # Per example loss `L(V, X_i)`
L <- y2 - y1^2 L <- y2 - y1^2
if (loss.out | loss.log | loss.only) {
meanL <- mean(L)
if (loss.out) {
# Bubble environments up and write to loss variable, aka out param.
loss <<- meanL
}
if (loss.log) {
loss.history[epoch, attempt] <<- meanL
}
if (loss.only) { if (loss.only) {
# Mean for total loss `L(V)`. # Mean for total loss `L(V)`.
return(mean(L)) return(meanL)
} else if (loss.out) { }
# Bubble environments up and write to loss variable, aka out param.
loss <<- mean(L)
} }
# Vectorized Weights with forced symmetry # Vectorized Weights with forced symmetry

View File

@ -4,7 +4,8 @@
\alias{grad} \alias{grad}
\title{Compute get gradient of `L(V)` given a dataset `X`.} \title{Compute get gradient of `L(V)` given a dataset `X`.}
\usage{ \usage{
grad(X, Y, V, h, loss.only = FALSE, loss.out = FALSE) grad(X, Y, V, h, loss.out = FALSE, loss.log = FALSE,
loss.only = FALSE)
} }
\arguments{ \arguments{
\item{X}{Data matrix.} \item{X}{Data matrix.}

113
notes.md Normal file
View File

@ -0,0 +1,113 @@
## Build and install.
To build the package the `devtools` package is used. This also provides `roxygen2` which is used for documentation and authomatic creaton of the `NAMESPACE` file.
```R
setwd("./CVE_R") # Set path to the package root.
library(devtools) # Load required `devtools` package.
document() # Create `.Rd` files and write `NAMESPACE`.
```
Next the package needs to be build, therefore (if pure `R` package, aka. `C/C++`, `Fortran`, ... code) just do the following.
```bash
R CMD build CVE_R
R CMD INSTALL CVE_0.1.tar.gz
```
Then we are ready for using the package.
```R
library(CVE)
help(package = "CVE")
```
## Build and install from within `R`.
An alternative approach is the following.
```R
setwd('./CVE_R')
getwd()
library(devtools)
document()
# No vignettes to build but "inst/doc/" is required!
(path <- build(vignettes = FALSE))
install.packages(path, repos = NULL, type = "source")
```
**Note: I only recommend this approach during development.**
## Reading log files.
The runtime tests (upcomming further tests) are creating log files saved in `tmp/`. These log files are `CSV` files (actualy `TSV`) with a header storing the test results. Depending on the test the files may contain differnt data. As an example we use the runtime test logs which store in each line the `dataset`, the used `method` as well as the `error` (actual error of estimated `B` against real `B`) and the `time`. For reading and analysing the data see the following example.
```R
# Load log as `data.frame`
test0 <- read.csv('tmp/test0.log', sep = '\t')
# Create a error boxplot grouped by dataset.
boxplot(error ~ dataset, test0)
```
## Environments and variable lookup.
In the following a view simple examples of how `R` searches for variables.
In addition we manipulate funciton closures to alter the search path in variable lookup and outer scope variable manipulation.
```R
droids <- "These aren't the droids you're looking for."
search <- function() {
print(droids)
}
trooper.seeks <- function() {
droids <- c("R2-D2", "C-3PO")
search()
}
jedi.seeks <- function() {
droids <- c("R2-D2", "C-3PO")
environment(search) <- environment()
search()
}
trooper.seeks()
jedi.seeks()
```
The next example ilustrates how to write (without local copies) to variables outside the functions local environment.
```R
counting <- function() {
count <<- count + 1 # Note the `<<-` assignment.
}
(function() {
environment(counting) <- environment()
count <- 0
for (i in 1:10) {
counting()
}
return(count)
})()
(function () {
closure <- new.env()
environment(counting) <- closure
assign("count", 0, envir = closure)
for (i in 1:10) {
counting()
}
return(closure$count)
})()
```
Another example for the usage of `do.call` where the evaluation of parameters is illustated (example taken (and altered) from `?do.call`).
```R
## examples of where objects will be found.
A <- "A.Global"
f <- function(x) print(paste("f.new", x))
env <- new.env()
assign("A", "A.new", envir = env)
assign("f", f, envir = env)
f <- function(x) print(paste("f.Global", x))
f(A) # f.Global A.Global
do.call("f", list(A)) # f.Global A.Global
do.call("f", list(A), envir = env) # f.new A.Global
do.call(f, list(A), envir = env) # f.Global A.Global
do.call("f", list(quote(A)), envir = env) # f.new A.new
do.call(f, list(quote(A)), envir = env) # f.Global A.new
do.call("f", list(as.name("A")), envir = env) # f.new A.new
do.call("f", list(as.name("A")), envir = env) # f.new A.new
```