diff --git a/CVE_R/R/cve_sgd.R b/CVE_R/R/cve_sgd.R index 23426df..9769a0c 100644 --- a/CVE_R/R/cve_sgd.R +++ b/CVE_R/R/cve_sgd.R @@ -37,7 +37,8 @@ cve_sgd <- function(X, Y, k, # Reset learning rate `tau`. tau <- tau.init - # Sample a starting basis from the Stiefl manifold. + # Sample a `(p, q)` dimensional matrix from the stiefel manifold as + # optimization start value. V <- rStiefl(p, q) # Repeat `epochs` times diff --git a/CVE_R/R/cve_simple.R b/CVE_R/R/cve_simple.R index b4daf18..23a87c8 100644 --- a/CVE_R/R/cve_simple.R +++ b/CVE_R/R/cve_simple.R @@ -12,35 +12,36 @@ cve_simple <- function(X, Y, k, epochs = 50L, attempts = 10L ) { - # Addapt tolearance for break condition - tol <- sqrt(2 * k) * tol - tau.init <- tau # remember to reset for new attempt + # Set `grad` functions environment to enable if to find this environments + # local variabels, needed to enable the manipulation of this local variables + # from within `grad`. + environment(grad) <- environment() + + # Setup loss histroy. + loss.history <- matrix(NA, epochs, attempts); # Get dimensions. n <- nrow(X) p <- ncol(X) q <- p - k + # Save initial learning rate `tau`. + tau.init <- tau + # Addapt tolearance for break condition. + tol <- sqrt(2 * q) * tol + # Estaimate bandwidth if not given. if (missing(h) | !is.numeric(h)) { h <- estimate.bandwidth(X, k, nObs) } - # Compue all static data. - X_diff <- row.pair.apply(X, `-`) - index <- matrix(seq(n * n), n, n) - tri.i <- row.pair.apply(index[, 1, drop = FALSE], function(i, j) { i }) - tri.j <- row.pair.apply(index[, 1, drop = FALSE], function(i, j) { j }) - lower.tri.ind <- index[lower.tri(index)] - upper.tri.ind <- t(index)[lower.tri.ind] # ATTENTION: corret order - I_p <- diag(1, p) - # Init variables for best attempt - loss.best <- Inf + # Init tracking of current best (according multiple attempts). V.best <- NULL + loss.best <- Inf - # Take a view attempts with different starting values + # Start loop for multiple attempts. for (attempt in 1:attempts) { # reset step width `tau` @@ -50,85 +51,63 @@ cve_simple <- function(X, Y, k, # optimization start value. V <- rStiefl(p, q) - ## Initial loss calculation - # Orthogonal projection to `span(V)`. - Q <- I_p - (V %*% t(V)) + # Initial loss and gradient. + loss <- Inf + G <- grad(X, Y, V, h, loss.out = TRUE) # `loss.out=T` sets `loss`! + # Set last loss (aka, loss after applying the step). + loss.last <- loss - # Compute vectorized distance matrix `D`. - vecD <- rowSums((X_diff %*% Q)^2) - # Compute weights matrix `W` - W <- matrix(1, n, n) # init (`exp(0) = 1` in the diagonal) - W[lower.tri.ind] <- exp(vecD / (-2 * h)) # set lower triangular part - W[upper.tri.ind] <- t(W)[upper.tri.ind] # mirror to upper triangular part - W <- sweep(W, 2, colSums(W), FUN = `/`) # normalize columns - - # Weighted `Y` momentums - y1 <- Y %*% W # is 1D anyway, avoid transposing `W` - y2 <- Y^2 %*% W - - # Get per sample loss `L(V, X_i)` - L <- y2 - y1^2 - # Sum to tolal loss `L(V)` - loss <- mean(L) + # Cayley transform matrix `A` + A <- (G %*% t(V)) - (V %*% t(G)) ## Start optimization loop. - for (iter in 1:epochs) { + for (epoch in 1:epochs) { + # Apply learning rate `tau`. + A.tau <- tau * A + # Parallet transport (on Stiefl manifold) into direction of `G`. + V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V) - # index according a lower triangular matrix stored in column major order - # by only the `i` or `j` index. - # vecW <- lower.tri.vector(W) + upper.tri.vector(W) - vecW <- W[lower.tri.ind] + W[upper.tri.ind] - S <- (L[tri.j] - (Y[tri.i] - y1[tri.j])^2) * vecW * vecD + # Loss at position after a step. + loss <- grad(X, Y, V.tau, h, loss.only = TRUE) - # Gradient - G <- t(X_diff) %*% sweep(X_diff %*% V, 1, S, `*`); - G <- (-2 / (n * h^2)) * G + # Check if step is appropriate + if ((loss - loss.last) > slack * loss.last) { + tau <- tau / 2 + next() # Keep position and try with smaller `tau`. + } + + # Compute error. + error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F") + # Check break condition (epoch check to skip ignored gradient calc). + # Note: the devision by `sqrt(2 * k)` is included in `tol`. + if (error < tol | epoch >= epochs) { + # take last step and stop optimization. + V <- V.tau + break() + } + + # Perform the step and remember previous loss. + V <- V.tau + loss.last <- loss + + # Compute gradient at new position. + # Note: `loss` will be updated too! + G <- grad(X, Y, V, h, loss.out = TRUE, loss.log = TRUE) # Cayley transform matrix `A` A <- (G %*% t(V)) - (V %*% t(G)) - - # Compute next `V` by step size `tau` unsing the Cayley transform - # via a parallel transport into the gradient direction. - A.tau <- tau * A - V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V) - - # Orthogonal projection to `span(V.tau)`. - Q <- I_p - (V.tau %*% t(V.tau)) - - # Compute vectorized distance matrix `D`. - vecD <- rowSums((X_diff %*% Q)^2) - # Compute weights matrix `W` (only update values, diag keeps 1's) - W[lower.tri.ind] <- exp(vecD / (-2 * h)) # set lower triangular part - W[upper.tri.ind] <- t(W)[upper.tri.ind] # mirror to upper triangular part - W <- sweep(W, 2, colSums(W), FUN = `/`) # normalize columns - - # Weighted `Y` momentums - y1 <- Y %*% W # is 1D anyway, avoid transposing `W` - y2 <- Y^2 %*% W - - # Get per sample loss `L(V, X_i)` - L <- y2 - y1^2 - # Sum to tolal loss `L(V)` - loss.tau <- mean(L) - - # Check if step is appropriate - if (loss != Inf & loss.tau - loss > slack * loss) { - tau <- tau / 2 - } else { - loss <- loss.tau - V <- V.tau - } } # Check if current attempt improved previous ones - if (loss.tau < loss.best) { - loss.best <- loss.tau - V.best <- V.tau + if (loss < loss.best) { + loss.best <- loss + V.best <- V } } return(list( + loss.history = loss.history, loss = loss.best, V = V.best, B = null(V.best), diff --git a/CVE_R/R/gradient.R b/CVE_R/R/gradient.R index d15b693..de8e821 100644 --- a/CVE_R/R/gradient.R +++ b/CVE_R/R/gradient.R @@ -8,7 +8,7 @@ #' value loss is returned and \code{envir} is ignored. #' @keywords internal #' @export -grad <- function(X, Y, V, h, loss.only = FALSE, loss.out = FALSE) { +grad <- function(X, Y, V, h, loss.out = FALSE, loss.log = FALSE, loss.only = FALSE) { # Get number of samples and dimension. n <- nrow(X) p <- ncol(X) @@ -23,12 +23,12 @@ grad <- function(X, Y, V, h, loss.only = FALSE, loss.out = FALSE) { lower <- index[lower.tri(index)] upper <- t(index)[lower] + # Create all pairewise differences of rows of `X`. + X_diff <- X[i, , drop = F] - X[j, , drop = F] + # Projection matrix onto `span(V)` Q <- diag(1, p) - (V %*% t(V)) - # Create all pairewise differences of rows of `X`. - X_diff <- X[i, , drop=F] - X[j, , drop=F] - # Vectorized distance matrix `D`. vecD <- rowSums((X_diff %*% Q)^2) @@ -36,7 +36,7 @@ grad <- function(X, Y, V, h, loss.only = FALSE, loss.out = FALSE) { W <- matrix(dnorm(0), n, n) W[lower] <- dnorm(vecD / h) # Set lower tri. part W[upper] <- t(W)[upper] # Mirror lower tri. to upper - W <- sweep(W, 2, colSums(W), FUN=`/`) # Col-Normalize + W <- sweep(W, 2, colSums(W), FUN = `/`) # Col-Normalize # Weighted `Y` momentums y1 <- Y %*% W # Result is 1D -> transposition irrelevant @@ -44,12 +44,19 @@ grad <- function(X, Y, V, h, loss.only = FALSE, loss.out = FALSE) { # Per example loss `L(V, X_i)` L <- y2 - y1^2 - if (loss.only) { - # Mean for total loss `L(V)`. - return(mean(L)) - } else if (loss.out) { - # Bubble environments up and write to loss variable, aka out param. - loss <<- mean(L) + if (loss.out | loss.log | loss.only) { + meanL <- mean(L) + if (loss.out) { + # Bubble environments up and write to loss variable, aka out param. + loss <<- meanL + } + if (loss.log) { + loss.history[epoch, attempt] <<- meanL + } + if (loss.only) { + # Mean for total loss `L(V)`. + return(meanL) + } } # Vectorized Weights with forced symmetry diff --git a/CVE_R/man/grad.Rd b/CVE_R/man/grad.Rd index e5d7fd9..491d379 100644 --- a/CVE_R/man/grad.Rd +++ b/CVE_R/man/grad.Rd @@ -4,7 +4,8 @@ \alias{grad} \title{Compute get gradient of `L(V)` given a dataset `X`.} \usage{ -grad(X, Y, V, h, loss.only = FALSE, loss.out = FALSE) +grad(X, Y, V, h, loss.out = FALSE, loss.log = FALSE, + loss.only = FALSE) } \arguments{ \item{X}{Data matrix.} diff --git a/notes.md b/notes.md new file mode 100644 index 0000000..102a6e7 --- /dev/null +++ b/notes.md @@ -0,0 +1,113 @@ +## Build and install. +To build the package the `devtools` package is used. This also provides `roxygen2` which is used for documentation and authomatic creaton of the `NAMESPACE` file. +```R +setwd("./CVE_R") # Set path to the package root. +library(devtools) # Load required `devtools` package. +document() # Create `.Rd` files and write `NAMESPACE`. +``` +Next the package needs to be build, therefore (if pure `R` package, aka. `C/C++`, `Fortran`, ... code) just do the following. +```bash +R CMD build CVE_R +R CMD INSTALL CVE_0.1.tar.gz +``` +Then we are ready for using the package. +```R +library(CVE) +help(package = "CVE") +``` +## Build and install from within `R`. +An alternative approach is the following. +```R +setwd('./CVE_R') +getwd() + +library(devtools) +document() +# No vignettes to build but "inst/doc/" is required! +(path <- build(vignettes = FALSE)) +install.packages(path, repos = NULL, type = "source") +``` +**Note: I only recommend this approach during development.** + +## Reading log files. +The runtime tests (upcomming further tests) are creating log files saved in `tmp/`. These log files are `CSV` files (actualy `TSV`) with a header storing the test results. Depending on the test the files may contain differnt data. As an example we use the runtime test logs which store in each line the `dataset`, the used `method` as well as the `error` (actual error of estimated `B` against real `B`) and the `time`. For reading and analysing the data see the following example. +```R +# Load log as `data.frame` +test0 <- read.csv('tmp/test0.log', sep = '\t') +# Create a error boxplot grouped by dataset. +boxplot(error ~ dataset, test0) +``` + +## Environments and variable lookup. +In the following a view simple examples of how `R` searches for variables. +In addition we manipulate funciton closures to alter the search path in variable lookup and outer scope variable manipulation. +```R +droids <- "These aren't the droids you're looking for." + +search <- function() { + print(droids) +} + +trooper.seeks <- function() { + droids <- c("R2-D2", "C-3PO") + search() +} + +jedi.seeks <- function() { + droids <- c("R2-D2", "C-3PO") + environment(search) <- environment() + search() +} + +trooper.seeks() +jedi.seeks() +``` + +The next example ilustrates how to write (without local copies) to variables outside the functions local environment. +```R +counting <- function() { + count <<- count + 1 # Note the `<<-` assignment. +} + +(function() { + environment(counting) <- environment() + count <- 0 + + for (i in 1:10) { + counting() + } + + return(count) +})() + +(function () { + closure <- new.env() + environment(counting) <- closure + assign("count", 0, envir = closure) + + for (i in 1:10) { + counting() + } + + return(closure$count) +})() +``` + +Another example for the usage of `do.call` where the evaluation of parameters is illustated (example taken (and altered) from `?do.call`). +```R +## examples of where objects will be found. +A <- "A.Global" +f <- function(x) print(paste("f.new", x)) +env <- new.env() +assign("A", "A.new", envir = env) +assign("f", f, envir = env) +f <- function(x) print(paste("f.Global", x)) +f(A) # f.Global A.Global +do.call("f", list(A)) # f.Global A.Global +do.call("f", list(A), envir = env) # f.new A.Global +do.call(f, list(A), envir = env) # f.Global A.Global +do.call("f", list(quote(A)), envir = env) # f.new A.new +do.call(f, list(quote(A)), envir = env) # f.Global A.new +do.call("f", list(as.name("A")), envir = env) # f.new A.new +do.call("f", list(as.name("A")), envir = env) # f.new A.new +```