fix: cve_simple runs correct,

add: notes
2019-09-02 15:22:35 +02:00 · 2019-09-02 15:22:35 +02:00 · 1c120ec67c
parent 7155d0e9db
commit 1c120ec67c
5 changed files with 191 additions and 90 deletions
--- a/CVE_R/R/cve_sgd.R
+++ b/CVE_R/R/cve_sgd.R
@ -37,7 +37,8 @@ cve_sgd <- function(X, Y, k,
        # Reset learning rate `tau`.
        tau <- tau.init

-        # Sample a starting basis from the Stiefl manifold.
+        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
+        # optimization start value.
        V <- rStiefl(p, q)

        # Repeat `epochs` times
--- a/CVE_R/R/cve_simple.R
+++ b/CVE_R/R/cve_simple.R
@ -12,35 +12,36 @@ cve_simple <- function(X, Y, k,
                       epochs = 50L,
                       attempts = 10L
 ) {
-    # Addapt tolearance for break condition
-    tol <- sqrt(2 * k) * tol
-    tau.init <- tau # remember to reset for new attempt
+    # Set `grad` functions environment to enable if to find this environments
+    # local variabels, needed to enable the manipulation of this local variables
+    # from within `grad`.
+    environment(grad) <- environment()
+
+    # Setup loss histroy.
+    loss.history <- matrix(NA, epochs, attempts);

    # Get dimensions.
    n <- nrow(X)
    p <- ncol(X)
    q <- p - k

+    # Save initial learning rate `tau`.
+    tau.init <- tau
+    # Addapt tolearance for break condition.
+    tol <- sqrt(2 * q) * tol
+
    # Estaimate bandwidth if not given.
    if (missing(h) | !is.numeric(h)) {
        h <- estimate.bandwidth(X, k, nObs)
    }

-    # Compue all static data.
-    X_diff <- row.pair.apply(X, `-`)
-    index <- matrix(seq(n * n), n, n)
-    tri.i <- row.pair.apply(index[, 1, drop = FALSE], function(i, j) { i })
-    tri.j <- row.pair.apply(index[, 1, drop = FALSE], function(i, j) { j })
-    lower.tri.ind <- index[lower.tri(index)]
-    upper.tri.ind <- t(index)[lower.tri.ind] # ATTENTION: corret order
-
    I_p <- diag(1, p)

-    # Init variables for best attempt
-    loss.best <- Inf
+    # Init tracking of current best (according multiple attempts).
    V.best <- NULL
+    loss.best <- Inf

-    # Take a view attempts with different starting values
+    # Start loop for multiple attempts.
    for (attempt in 1:attempts) {

        # reset step width `tau`
@ -50,85 +51,63 @@ cve_simple <- function(X, Y, k,
        # optimization start value.
        V <- rStiefl(p, q)

-        ## Initial loss calculation
-        # Orthogonal projection to `span(V)`.
-        Q <- I_p - (V %*% t(V))
-
-        # Compute vectorized distance matrix `D`.
-        vecD <- rowSums((X_diff %*% Q)^2)
-        # Compute weights matrix `W`
-        W <- matrix(1, n, n) # init (`exp(0) = 1` in the diagonal)
-        W[lower.tri.ind] <- exp(vecD / (-2 * h)) # set lower triangular part
-        W[upper.tri.ind] <- t(W)[upper.tri.ind] # mirror to upper triangular part
-        W <- sweep(W, 2, colSums(W), FUN = `/`) # normalize columns
-
-        # Weighted `Y` momentums
-        y1 <- Y %*% W # is 1D anyway, avoid transposing `W`
-        y2 <- Y^2 %*% W
-
-        # Get per sample loss `L(V, X_i)`
-        L <- y2 - y1^2
-        # Sum to tolal loss `L(V)`
-        loss <- mean(L)
-
-        ## Start optimization loop.
-        for (iter in 1:epochs) {
-
-            # index according a lower triangular matrix stored in column major order
-            # by only the `i` or `j` index.
-            # vecW <- lower.tri.vector(W) + upper.tri.vector(W)
-            vecW <- W[lower.tri.ind] + W[upper.tri.ind]
-            S <- (L[tri.j] - (Y[tri.i] - y1[tri.j])^2) * vecW * vecD
-
-            # Gradient
-            G <- t(X_diff) %*% sweep(X_diff %*% V, 1, S, `*`);
-            G <- (-2 / (n * h^2)) * G
+        # Initial loss and gradient.
+        loss <- Inf
+        G <- grad(X, Y, V, h, loss.out = TRUE) # `loss.out=T` sets `loss`!
+        # Set last loss (aka, loss after applying the step).
+        loss.last <- loss

        # Cayley transform matrix `A`
        A <- (G %*% t(V)) - (V %*% t(G))

-            # Compute next `V` by step size `tau` unsing the Cayley transform
-            # via a parallel transport into the gradient direction.
+        ## Start optimization loop.
+        for (epoch in 1:epochs) {
+            # Apply learning rate `tau`.
            A.tau <- tau * A
+            # Parallet transport (on Stiefl manifold) into direction of `G`.
            V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)

-            # Orthogonal projection to `span(V.tau)`.
-            Q <- I_p - (V.tau %*% t(V.tau))
-
-            # Compute vectorized distance matrix `D`.
-            vecD <- rowSums((X_diff %*% Q)^2)
-            # Compute weights matrix `W` (only update values, diag keeps 1's)
-            W[lower.tri.ind] <- exp(vecD / (-2 * h)) # set lower triangular part
-            W[upper.tri.ind] <- t(W)[upper.tri.ind] # mirror to upper triangular part
-            W <- sweep(W, 2, colSums(W), FUN = `/`) # normalize columns
-
-            # Weighted `Y` momentums
-            y1 <- Y %*% W # is 1D anyway, avoid transposing `W`
-            y2 <- Y^2 %*% W
-
-            # Get per sample loss `L(V, X_i)`
-            L <- y2 - y1^2
-            # Sum to tolal loss `L(V)`
-            loss.tau <- mean(L)
+            # Loss at position after a step.
+            loss <- grad(X, Y, V.tau, h, loss.only = TRUE)

            # Check if step is appropriate
-            if (loss != Inf & loss.tau - loss > slack * loss) {
+            if ((loss - loss.last) > slack * loss.last) {
                tau <- tau / 2
-            } else {
-                loss <- loss.tau
-                V <- V.tau
+                next() # Keep position and try with smaller `tau`.
            }
+
+            # Compute error.
+            error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
+            # Check break condition (epoch check to skip ignored gradient calc).
+            # Note: the devision by `sqrt(2 * k)` is included in `tol`.
+            if (error < tol | epoch >= epochs) {
+                # take last step and stop optimization.
+                V <- V.tau
+                break()
+            }
+
+            # Perform the step and remember previous loss.
+            V <- V.tau
+            loss.last <- loss
+
+            # Compute gradient at new position.
+            # Note: `loss` will be updated too!
+            G <- grad(X, Y, V, h, loss.out = TRUE, loss.log = TRUE)
+
+            # Cayley transform matrix `A`
+            A <- (G %*% t(V)) - (V %*% t(G))
        }

        # Check if current attempt improved previous ones
-        if (loss.tau < loss.best) {
-            loss.best <- loss.tau
-            V.best <- V.tau
+        if (loss < loss.best) {
+            loss.best <- loss
+            V.best <- V
        }

    }

    return(list(
+        loss.history = loss.history,
        loss = loss.best,
        V = V.best,
        B = null(V.best),
--- a/CVE_R/R/gradient.R
+++ b/CVE_R/R/gradient.R
@ -8,7 +8,7 @@
 #'  value loss is returned and \code{envir} is ignored.
 #' @keywords internal
 #' @export
-grad <- function(X, Y, V, h, loss.only = FALSE, loss.out = FALSE) {
+grad <- function(X, Y, V, h, loss.out = FALSE, loss.log = FALSE, loss.only = FALSE) {
    # Get number of samples and dimension.
    n <- nrow(X)
    p <- ncol(X)
@ -23,12 +23,12 @@ grad <- function(X, Y, V, h, loss.only = FALSE, loss.out = FALSE) {
    lower <- index[lower.tri(index)]
    upper <- t(index)[lower]

-    # Projection matrix onto `span(V)`
-    Q <- diag(1, p) - (V %*% t(V))
-
    # Create all pairewise differences of rows of `X`.
    X_diff <- X[i, , drop = F] - X[j, , drop = F]

+    # Projection matrix onto `span(V)`
+    Q <- diag(1, p) - (V %*% t(V))
+
    # Vectorized distance matrix `D`.
    vecD <- rowSums((X_diff %*% Q)^2)

@ -44,12 +44,19 @@ grad <- function(X, Y, V, h, loss.only = FALSE, loss.out = FALSE) {

    # Per example loss `L(V, X_i)`
    L <- y2 - y1^2
+    if (loss.out | loss.log | loss.only) {
+        meanL <- mean(L)
+        if (loss.out) {
+            # Bubble environments up and write to loss variable, aka out param.
+            loss <<- meanL
+        }
+        if (loss.log) {
+            loss.history[epoch, attempt] <<- meanL
+        }
        if (loss.only) {
            # Mean for total loss `L(V)`.
-        return(mean(L))
-    } else if (loss.out) {
-        # Bubble environments up and write to loss variable, aka out param.
-        loss <<- mean(L)
+            return(meanL)
+        }
    }

    # Vectorized Weights with forced symmetry
--- a/CVE_R/man/grad.Rd
+++ b/CVE_R/man/grad.Rd
@ -4,7 +4,8 @@
 \alias{grad}
 \title{Compute get gradient of `L(V)` given a dataset `X`.}
 \usage{
-grad(X, Y, V, h, loss.only = FALSE, loss.out = FALSE)
+grad(X, Y, V, h, loss.out = FALSE, loss.log = FALSE,
+  loss.only = FALSE)
 }
 \arguments{
 \item{X}{Data matrix.}
--- a/notes.md
+++ b/notes.md
@ -0,0 +1,113 @@
+## Build and install.
+To build the package the `devtools` package is used. This also provides `roxygen2` which is used for documentation and authomatic creaton of the `NAMESPACE` file.
+```R
+setwd("./CVE_R")  # Set path to the package root.
+library(devtools) # Load required `devtools` package.
+document()        # Create `.Rd` files and write `NAMESPACE`.
+```
+Next the package needs to be build, therefore (if pure `R` package, aka. `C/C++`, `Fortran`, ... code) just do the following.
+```bash
+R CMD build CVE_R
+R CMD INSTALL CVE_0.1.tar.gz
+```
+Then we are ready for using the package.
+```R
+library(CVE)
+help(package = "CVE")
+```
+## Build and install from within `R`.
+An alternative approach is the following.
+```R
+setwd('./CVE_R')
+getwd()
+
+library(devtools)
+document()
+# No vignettes to build but "inst/doc/" is required!
+(path <- build(vignettes = FALSE))
+install.packages(path, repos = NULL, type = "source")
+```
+**Note: I only recommend this approach during development.**
+
+## Reading log files.
+The runtime tests (upcomming further tests) are creating log files saved in `tmp/`. These log files are `CSV` files (actualy `TSV`) with a header storing the test results. Depending on the test the files may contain differnt data. As an example we use the runtime test logs which store in each line the `dataset`, the used `method` as well as the `error` (actual error of estimated `B` against real `B`) and the `time`. For reading and analysing the data see the following example.
+```R
+# Load log as `data.frame`
+test0 <- read.csv('tmp/test0.log', sep = '\t')
+# Create a error boxplot grouped by dataset.
+boxplot(error ~ dataset, test0)
+```
+
+## Environments and variable lookup.
+In the following a view simple examples of how `R` searches for variables.
+In addition we manipulate funciton closures to alter the search path in variable lookup and outer scope variable manipulation.
+```R
+droids <- "These aren't the droids you're looking for."
+
+search <- function() {
+    print(droids)
+}
+
+trooper.seeks <- function() {
+    droids <- c("R2-D2", "C-3PO")
+    search()
+}
+
+jedi.seeks <- function() {
+    droids <- c("R2-D2", "C-3PO")
+    environment(search) <- environment()
+    search()
+}
+
+trooper.seeks()
+jedi.seeks()
+```
+
+The next example ilustrates how to write (without local copies) to variables outside the functions local environment.
+```R
+counting <- function() {
+    count <<- count + 1 # Note the `<<-` assignment.
+}
+
+(function() {
+    environment(counting) <- environment()
+    count <- 0
+
+    for (i in 1:10) {
+        counting()
+    }
+
+    return(count)
+})()
+
+(function () {
+    closure <- new.env()
+    environment(counting) <- closure
+    assign("count", 0, envir = closure)
+
+    for (i in 1:10) {
+        counting()
+    }
+
+    return(closure$count)
+})()
+```
+
+Another example for the usage of `do.call` where the evaluation of parameters is illustated (example taken (and altered) from `?do.call`).
+```R
+## examples of where objects will be found.
+A <- "A.Global"
+f <- function(x) print(paste("f.new", x))
+env <- new.env()
+assign("A", "A.new", envir = env)
+assign("f", f, envir = env)
+f <- function(x) print(paste("f.Global", x))
+f(A)                                          # f.Global A.Global
+do.call("f", list(A))                         # f.Global A.Global
+do.call("f", list(A), envir = env)            # f.new A.Global
+do.call(f,   list(A), envir = env)            # f.Global A.Global
+do.call("f", list(quote(A)), envir = env)     # f.new A.new
+do.call(f,   list(quote(A)), envir = env)     # f.Global A.new
+do.call("f", list(as.name("A")), envir = env) # f.new A.new
+do.call("f", list(as.name("A")), envir = env) # f.new A.new
+```