add: cve_linesearch,

fix: cleaned and optimized gradient, add. notes
2019-09-02 21:07:56 +02:00 · 2019-09-02 21:07:56 +02:00 · 7d4d01a9a7
commit 7d4d01a9a7
parent 1c120ec67c
11 changed files with 397 additions and 60 deletions
--- a/CVE_R/NAMESPACE
+++ b/CVE_R/NAMESPACE
@ -6,6 +6,7 @@ export(col.pair.apply)
 export(cve)
 export(cve.call)
 export(cve.grid.search)
+export(cve_linesearch)
 export(cve_sgd)
 export(cve_simple)
 export(dataset)
--- a/CVE_R/R/CVE.R
+++ b/CVE_R/R/CVE.R
@ -94,6 +94,8 @@ cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5, k, ...) {
    method <- tolower(method)
    if (method == 'simple') {
        dr <- cve_simple(X, Y, k, nObs = nObs, ...)
+    } else if (method == 'linesearch') {
+        dr <- cve_linesearch(X, Y, k, nObs = nObs, ...)
    } else if (method == 'sgd') {
        dr <- cve_sgd(X, Y, k, nObs = nObs, ...)
    } else {
--- a/CVE_R/R/cve_linesearch.R
+++ b/CVE_R/R/cve_linesearch.R
@ -0,0 +1,163 @@
+#' Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
+#' conditions.
+#'
+#' @keywords internal
+#' @export
+cve_linesearch <- function(X, Y, k,
+                           nObs = sqrt(nrow(X)),
+                           h = NULL,
+                           tau = 1.0,
+                           tol = 1e-3,
+                           rho1 = 0.1,
+                           rho2 = 0.9,
+                           slack = 0,
+                           epochs = 50L,
+                           attempts = 10L,
+                           max.linesearch.iter = 10L
+) {
+    # Set `grad` functions environment to enable if to find this environments
+    # local variabels, needed to enable the manipulation of this local variables
+    # from within `grad`.
+    environment(grad) <- environment()
+
+    # Setup histories.
+    loss.history <- matrix(NA, epochs, attempts)
+    error.history <- matrix(NA, epochs, attempts)
+    tau.history <- matrix(NA, epochs, attempts)
+
+    # Get dimensions.
+    n <- nrow(X)
+    p <- ncol(X)
+    q <- p - k
+
+    # Save initial learning rate `tau`.
+    tau.init <- tau
+    # Addapt tolearance for break condition.
+    tol <- sqrt(2 * q) * tol
+
+    # Estaimate bandwidth if not given.
+    if (missing(h) | !is.numeric(h)) {
+        h <- estimate.bandwidth(X, k, nObs)
+    }
+
+    # Compute persistent data.
+    # Compute lookup indexes for symmetrie, lower/upper
+    # triangular parts and vectorization.
+    pair.index <- elem.pairs(seq(n))
+    i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
+    j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
+    # Matrix of vectorized indices. (vec(index) -> seq)
+    index <- matrix(seq(n * n), n, n)
+    lower <- index[lower.tri(index)]
+    upper <- t(index)[lower]
+
+    # Create all pairewise differences of rows of `X`.
+    X_diff <- X[i, , drop = F] - X[j, , drop = F]
+    # Identity matrix.
+    I_p <- diag(1, p)
+
+    # Init tracking of current best (according multiple attempts).
+    V.best <- NULL
+    loss.best <- Inf
+
+    # Start loop for multiple attempts.
+    for (attempt in 1:attempts) {
+
+        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
+        # optimization start value.
+        V <- rStiefl(p, q)
+
+        # Initial loss and gradient.
+        loss <- Inf
+        G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
+        # Set last loss (aka, loss after applying the step).
+        loss.last <- loss
+
+        ## Start optimization loop.
+        for (epoch in 1:epochs) {
+
+            # Cayley transform matrix `A`
+            A <- (G %*% t(V)) - (V %*% t(G))
+
+            # Directional derivative of the loss at current position, given
+            # as `Tr(G^T \cdot A \cdot V)`.
+            loss.prime <- -0.5 * norm(A, type = 'F')^2
+
+            # Linesearch
+            tau.upper <- Inf
+            tau.lower <- 0
+            tau <- tau.init
+            for (iter in 1:max.linesearch.iter) {
+                # Apply learning rate `tau`.
+                A.tau <- (tau / 2) * A
+                # Parallet transport (on Stiefl manifold) into direction of `G`.
+                inv <- solve(I_p + A.tau)
+                V.tau <- inv %*% ((I_p - A.tau) %*% V)
+
+                # Loss at position after a step.
+                loss <- Inf # aka loss.tau
+                G.tau <- grad(X, Y, V.tau, h, loss.out = TRUE, persistent = TRUE)
+
+                # Armijo condition.
+                if (loss > loss.last + (rho1 * tau * loss.prime)) {
+                    tau.upper <- tau
+                    tau <- (tau.lower + tau.upper) / 2
+                    next()
+                }
+
+                V.prime.tau <- -0.5 * inv %*% A %*% (V + V.tau)
+                loss.prime.tau <- sum(G * V.prime.tau) # Tr(grad(tau)^T \cdot Y^'(tau))
+
+                # Wolfe condition.
+                if (loss.prime.tau < rho2 * loss.prime) {
+                    tau.lower <- tau
+                    if (tau.upper == Inf) {
+                        tau <- 2 * tau.lower
+                    } else {
+                        tau <- (tau.lower + tau.upper) / 2
+                    }
+                } else {
+                    break()
+                }
+            }
+
+            # Compute error.
+            error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
+
+            # Write history.
+            loss.history[epoch, attempt] <- loss
+            error.history[epoch, attempt] <- error
+            tau.history[epoch, attempt] <- tau
+
+            # Check break condition (epoch check to skip ignored gradient calc).
+            # Note: the devision by `sqrt(2 * k)` is included in `tol`.
+            if (error < tol | epoch >= epochs) {
+                # take last step and stop optimization.
+                V <- V.tau
+                break()
+            }
+
+            # Perform the step and remember previous loss.
+            V <- V.tau
+            loss.last <- loss
+            G <- G.tau
+        }
+
+        # Check if current attempt improved previous ones
+        if (loss < loss.best) {
+            loss.best <- loss
+            V.best <- V
+        }
+
+    }
+
+    return(list(
+        loss.history = loss.history,
+        error.history = error.history,
+        tau.history = tau.history,
+        loss = loss.best,
+        V = V.best,
+        B = null(V.best),
+        h = h
+    ))
+}
--- a/CVE_R/R/cve_sgd.R
+++ b/CVE_R/R/cve_sgd.R
@ -7,10 +7,20 @@ cve_sgd <- function(X, Y, k,
                    nObs = sqrt(nrow(X)),
                    h = NULL,
                    tau = 0.01,
+                    tol = 1e-3,
                    epochs = 50L,
                    batch.size = 16L,
                    attempts = 10L
 ) {
+    # Set `grad` functions environment to enable if to find this environments
+    # local variabels, needed to enable the manipulation of this local variables
+    # from within `grad`.
+    environment(grad) <- environment()
+
+    # Setup histories.
+    loss.history <- matrix(NA, epochs, attempts)
+    error.history <- matrix(NA, epochs, attempts)
+
    # Get dimensions.
    n <- nrow(X) # Number of samples.
    p <- ncol(X) # Data dimensions
@ -18,15 +28,31 @@ cve_sgd <- function(X, Y, k,

    # Save initial learning rate `tau`.
    tau.init <- tau
+    # Addapt tolearance for break condition.
+    tol <- sqrt(2 * q) * tol

    # Estaimate bandwidth if not given.
    if (missing(h) | !is.numeric(h)) {
        h <- estimate.bandwidth(X, k, nObs)
    }

+    # Compute persistent data.
+    # Compute lookup indexes for symmetrie, lower/upper
+    # triangular parts and vectorization.
+    pair.index <- elem.pairs(seq(n))
+    i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
+    j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
+    # Matrix of vectorized indices. (vec(index) -> seq)
+    index <- matrix(seq(n * n), n, n)
+    lower <- index[lower.tri(index)]
+    upper <- t(index)[lower]
+
+    # Create all pairewise differences of rows of `X`.
+    X_diff <- X[i, , drop = F] - X[j, , drop = F]
+    # Identity matrix.
+    I_p <- diag(1, p)
    # Init a list of data indices (shuffled for batching).
    indices <- seq(n)
-    I_p <- diag(1, p)

    # Init tracking of current best (according multiple attempts).
    V.best <- NULL
@ -40,6 +66,8 @@ cve_sgd <- function(X, Y, k,
        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
        # optimization start value.
        V <- rStiefl(p, q)
+        # Keep track of last `V` for computing error after an epoch.
+        V.last <- V

        # Repeat `epochs` times
        for (epoch in 1:epochs) {
@ -55,7 +83,7 @@ cve_sgd <- function(X, Y, k,

                # Compute batch gradient.
                loss <- NULL
-                G <- grad(X[batch, ], Y[batch], V, h)
+                G <- grad(X[batch, ], Y[batch], V, h, loss.out = TRUE)

                # Cayley transform matrix.
                A <- (G %*% t(V)) - (V %*% t(G))
@ -65,27 +93,34 @@ cve_sgd <- function(X, Y, k,
                # Parallet transport (on Stiefl manifold) into direction of `G`.
                V <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
            }
-        }
            # Compute actuall loss after finishing optimization.
-        loss <- grad(X, Y, V, h, loss.only = TRUE)
-        # After each attempt, check if last attempt reached a better result.
-        if (!is.null(V.best)) { # Only required if there is already a result.
-            if (loss < loss.best) {
-                loss.best <- loss
-                V.best <- V
+            loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
+            # And the error for the history.
+            error <- norm(V.last %*% t(V.last) - V %*% t(V), type = "F")
+            V.last <- V
+
+            # Finaly write history.
+            loss.history[epoch, attempt] <- loss
+            error.history[epoch, attempt] <- error
+
+            # Check break condition.
+            if (error < tol) {
+                break()
            }
-        } else {
+        }
+        # After each attempt, check if last attempt reached a better result.
+        if (loss < loss.best) {
            loss.best <- loss
            V.best <- V
        }
    }

    return(list(
-        X = X, Y = Y, k = k,
-        nObs = nObs, h = h, tau = tau,
-        epochs = epochs, batch = batch, attempts = attempts,
+        loss.history = loss.history,
+        error.history = error.history,
        loss = loss.best,
        V = V.best,
-        B = null(V.best)
+        B = null(V.best),
+        h = h
    ))
 }
--- a/CVE_R/R/cve_simple.R
+++ b/CVE_R/R/cve_simple.R
@ -17,8 +17,9 @@ cve_simple <- function(X, Y, k,
    # from within `grad`.
    environment(grad) <- environment()

-    # Setup loss histroy.
-    loss.history <- matrix(NA, epochs, attempts);
+    # Setup histories.
+    loss.history <- matrix(NA, epochs, attempts)
+    error.history <- matrix(NA, epochs, attempts)

    # Get dimensions.
    n <- nrow(X)
@ -35,6 +36,20 @@ cve_simple <- function(X, Y, k,
        h <- estimate.bandwidth(X, k, nObs)
    }

+    # Compute persistent data.
+    # Compute lookup indexes for symmetrie, lower/upper
+    # triangular parts and vectorization.
+    pair.index <- elem.pairs(seq(n))
+    i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
+    j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
+    # Matrix of vectorized indices. (vec(index) -> seq)
+    index <- matrix(seq(n * n), n, n)
+    lower <- index[lower.tri(index)]
+    upper <- t(index)[lower]
+
+    # Create all pairewise differences of rows of `X`.
+    X_diff <- X[i, , drop = F] - X[j, , drop = F]
+    # Identity matrix.
    I_p <- diag(1, p)

    # Init tracking of current best (according multiple attempts).
@ -53,7 +68,7 @@ cve_simple <- function(X, Y, k,

        # Initial loss and gradient.
        loss <- Inf
-        G <- grad(X, Y, V, h, loss.out = TRUE) # `loss.out=T` sets `loss`!
+        G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
        # Set last loss (aka, loss after applying the step).
        loss.last <- loss

@ -68,7 +83,7 @@ cve_simple <- function(X, Y, k,
            V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)

            # Loss at position after a step.
-            loss <- grad(X, Y, V.tau, h, loss.only = TRUE)
+            loss <- grad(X, Y, V.tau, h, loss.only = TRUE, persistent = TRUE)

            # Check if step is appropriate
            if ((loss - loss.last) > slack * loss.last) {
@ -78,6 +93,11 @@ cve_simple <- function(X, Y, k,

            # Compute error.
            error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
+
+            # Write history.
+            loss.history[epoch, attempt] <- loss
+            error.history[epoch, attempt] <- error
+
            # Check break condition (epoch check to skip ignored gradient calc).
            # Note: the devision by `sqrt(2 * k)` is included in `tol`.
            if (error < tol | epoch >= epochs) {
@ -91,8 +111,7 @@ cve_simple <- function(X, Y, k,
            loss.last <- loss

            # Compute gradient at new position.
-            # Note: `loss` will be updated too!
-            G <- grad(X, Y, V, h, loss.out = TRUE, loss.log = TRUE)
+            G <- grad(X, Y, V, h, persistent = TRUE)

            # Cayley transform matrix `A`
            A <- (G %*% t(V)) - (V %*% t(G))
@ -108,6 +127,7 @@ cve_simple <- function(X, Y, k,

    return(list(
        loss.history = loss.history,
+        error.history = error.history,
        loss = loss.best,
        V = V.best,
        B = null(V.best),
--- a/CVE_R/R/gradient.R
+++ b/CVE_R/R/gradient.R
@ -4,15 +4,23 @@
 #' @param Y Responce.
 #' @param V Position to compute the gradient at, aka point on Stiefl manifold.
 #' @param h Bandwidth
+#' @param loss.out Iff \code{TRUE} loss will be written to parent environment.
 #' @param loss.only Boolean to only compute the loss, of \code{TRUE} a single
 #'  value loss is returned and \code{envir} is ignored.
+#' @param persistent Determines if data indices and dependent calculations shall
+#'  be reused from the parent environment. ATTENTION: Do NOT set this flag, only
+#'  intended for internal usage by carefully aligned functions!
 #' @keywords internal
 #' @export
-grad <- function(X, Y, V, h, loss.out = FALSE, loss.log = FALSE, loss.only = FALSE) {
+grad <- function(X, Y, V, h,
+                 loss.out = FALSE,
+                 loss.only = FALSE,
+                 persistent = FALSE) {
    # Get number of samples and dimension.
    n <- nrow(X)
    p <- ncol(X)

+    if (!persistent) {
        # Compute lookup indexes for symmetrie, lower/upper
        # triangular parts and vectorization.
        pair.index <- elem.pairs(seq(n))
@ -21,21 +29,22 @@ grad <- function(X, Y, V, h, loss.out = FALSE, loss.log = FALSE, loss.only = FAL
        # Matrix of vectorized indices. (vec(index) -> seq)
        index <- matrix(seq(n * n), n, n)
        lower <- index[lower.tri(index)]
-    upper <- t(index)[lower]
+        upper <- t.default(index)[lower]

        # Create all pairewise differences of rows of `X`.
        X_diff <- X[i, , drop = F] - X[j, , drop = F]
+    }

    # Projection matrix onto `span(V)`
-    Q <- diag(1, p) - (V %*% t(V))
+    Q <- diag(1, p) - tcrossprod(V, V)

    # Vectorized distance matrix `D`.
    vecD <- rowSums((X_diff %*% Q)^2)

    # Weight matrix `W` (dnorm ... gaussean density function)
-    W <- matrix(dnorm(0), n, n)
-    W[lower] <- dnorm(vecD / h) # Set lower tri. part
-    W[upper] <- t(W)[upper] # Mirror lower tri. to upper
+    W <- matrix(1, n, n) # `exp(0) == 1`
+    W[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part
+    W[upper] <- t.default(W)[upper] # Mirror lower tri. to upper
    W <- sweep(W, 2, colSums(W), FUN = `/`) # Col-Normalize

    # Weighted `Y` momentums
@ -44,19 +53,11 @@ grad <- function(X, Y, V, h, loss.out = FALSE, loss.log = FALSE, loss.only = FAL

    # Per example loss `L(V, X_i)`
    L <- y2 - y1^2
-    if (loss.out | loss.log | loss.only) {
-        meanL <- mean(L)
-        if (loss.out) {
-            # Bubble environments up and write to loss variable, aka out param.
-            loss <<- meanL
-        }
-        if (loss.log) {
-            loss.history[epoch, attempt] <<- meanL
-        }
    if (loss.only) {
-            # Mean for total loss `L(V)`.
-            return(meanL)
+        return(mean(L))
    }
+    if (loss.out) {
+        loss <<- mean(L)
    }

    # Vectorized Weights with forced symmetry
@ -66,7 +67,7 @@ grad <- function(X, Y, V, h, loss.out = FALSE, loss.log = FALSE, loss.only = FAL
    vecS <- vecS * vecD

    # The gradient.
-    G <- t(X_diff) %*% sweep(X_diff %*% V, 1, vecS, `*`)
+    G <- crossprod(X_diff, sweep(X_diff %*% V, 1, vecS, `*`))
    G <- (-2 / (n * h^2)) * G
    return(G)
 }
--- a/CVE_R/man/cve_linesearch.Rd
+++ b/CVE_R/man/cve_linesearch.Rd
@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/cve_linesearch.R
+\name{cve_linesearch}
+\alias{cve_linesearch}
+\title{Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
+conditions.}
+\usage{
+cve_linesearch(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
+  tol = 0.001, rho1 = 0.1, rho2 = 0.9, slack = 0, epochs = 50L,
+  attempts = 10L, max.linesearch.iter = 10L)
+}
+\description{
+Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
+conditions.
+}
+\keyword{internal}
--- a/CVE_R/man/cve_sgd.Rd
+++ b/CVE_R/man/cve_sgd.Rd
@ -6,7 +6,7 @@
 a classic GD method unsing no further tricks.}
 \usage{
 cve_sgd(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 0.01,
-  epochs = 50L, batch.size = 16L, attempts = 10L)
+  tol = 0.001, epochs = 50L, batch.size = 16L, attempts = 10L)
 }
 \description{
 Simple implementation of the CVE method. 'Simple' means that this method is
--- a/CVE_R/man/grad.Rd
+++ b/CVE_R/man/grad.Rd
@ -4,8 +4,8 @@
 \alias{grad}
 \title{Compute get gradient of `L(V)` given a dataset `X`.}
 \usage{
-grad(X, Y, V, h, loss.out = FALSE, loss.log = FALSE,
-  loss.only = FALSE)
+grad(X, Y, V, h, loss.out = FALSE, loss.only = FALSE,
+  persistent = FALSE)
 }
 \arguments{
 \item{X}{Data matrix.}
@ -16,8 +16,14 @@ grad(X, Y, V, h, loss.out = FALSE, loss.log = FALSE,

 \item{h}{Bandwidth}

+\item{loss.out}{Iff \code{TRUE} loss will be written to parent environment.}
+
 \item{loss.only}{Boolean to only compute the loss, of \code{TRUE} a single
 value loss is returned and \code{envir} is ignored.}
+
+\item{persistent}{Determines if data indices and dependent calculations shall
+be reused from the parent environment. ATTENTION: Do NOT set this flag, only
+intended for internal usage by carefully aligned functions!}
 }
 \description{
 Compute get gradient of `L(V)` given a dataset `X`.
--- a/notes.md
+++ b/notes.md
@ -111,3 +111,89 @@ do.call(f,   list(quote(A)), envir = env)     # f.Global A.new
 do.call("f", list(as.name("A")), envir = env) # f.new A.new
 do.call("f", list(as.name("A")), envir = env) # f.new A.new
 ```
+
+# Performance benchmarks
+In this section alternative implementations of simple algorithms are compared for there performance.
+
+### Computing the trace of a matrix multiplication.
+```R
+library(microbenchmark)
+
+A <- matrix(runif(120), 12, 10)
+
+# Matrix trace.
+tr <- function(M) sum(diag(M))
+
+# Check correctnes and benckmark performance.
+stopifnot(
+    all.equal(
+        tr(t(A) %*% A),
+        sum(diag(t(A) %*% A)),
+        sum(A * A)
+    )
+)
+microbenchmark(
+    tr(t(A) %*% A),
+    sum(diag(t(A) %*% A)),
+    sum(A * A)
+)
+# Unit: nanoseconds
+#                   expr  min   lq    mean median     uq   max neval
+#         tr(t(A) %*% A) 4335 4713 5076.36 4949.5 5402.5  7928   100
+#  sum(diag(t(A) %*% A)) 4106 4429 5233.89 4733.5 5057.5 49308   100
+#             sum(A * A)  540  681  777.07  740.0  818.5  3572   100
+```
+
+```R
+n <- 200
+M <- matrix(runif(n^2), n, n)
+
+dnorm2 <- function(x) exp(-0.5 * x^2) / sqrt(2 * pi)
+
+stopifnot(
+    all.equal(dnorm(M), dnorm2(M))
+)
+microbenchmark(
+    dnorm = dnorm(M),
+    dnorm2 = dnorm2(M),
+    exp = exp(-0.5 * M^2) # without scaling -> irrelevant for usage
+)
+# Unit: microseconds
+#   expr     min      lq     mean   median       uq      max neval
+#  dnorm 841.503 843.811 920.7828 855.7505 912.4720 2405.587   100
+# dnorm2 543.510 580.319 629.5321 597.8540 607.3795 2603.763   100
+#    exp 502.083 535.943 577.2884 548.3745 561.3280 2113.220   100
+```
+
+### Using `crosspord()`
+```R
+p <- 12
+q <- 10
+V <- matrix(runif(p * q), p, q)
+
+stopifnot(
+    all.equal(V %*% t(V), tcrossprod(V)),
+    all.equal(V %*% t(V), tcrossprod(V, V))
+)
+microbenchmark(
+    V %*% t(V),
+    tcrossprod(V),
+    tcrossprod(V, V)
+)
+# Unit: microseconds
+#              expr   min     lq    mean median     uq    max neval
+#        V %*% t(V) 2.293 2.6335 2.94673 2.7375 2.9060 19.592   100
+#     tcrossprod(V) 1.148 1.2475 1.86173 1.3440 1.4650 30.688   100
+#  tcrossprod(V, V) 1.003 1.1575 1.28451 1.2400 1.3685  2.742   100
+```
+
+## Using `Rprof()` for performance.
+The standart method for profiling where an algorithm is spending its time is with `Rprof()`.
+```R
+path <- '../tmp/R.prof' # path to profiling file
+Rprof(path)
+cve.res <- cve.call(X, Y, k = k)
+Rprof(NULL)
+(prof <- summaryRprof(path)) # Summarise results
+```
+**Note: considure to run `gc()` before measuring**, aka cleaning up by explicitely calling the garbage collector.
--- a/runtime_test.R
+++ b/runtime_test.R
@ -7,9 +7,11 @@ tell.user <- function(name, start.time, i, length) {
        i, "/", length,
        " - elapsed:", format(Sys.time() - start.time), "\033[K")
 }
-
-library(CVE) # load CVE
-source("CVE_legacy/function_script.R") # Source legacy code
+subspace.dist <- function(B1, B2){
+    P1 <- B1 %*% solve(t(B1) %*% B1) %*% t(B1)
+    P2 <- B2 %*% solve(t(B2) %*% B2) %*% t(B2)
+    return(norm(P1 - P2, type = 'F'))
+}

 # Number of simulations
 SIM.NR <- 50
@ -20,8 +22,13 @@ ATTEMPTS <- 10
 # set names of datasets
 dataset.names <- c("M1", "M2", "M3", "M4", "M5")
 # Set used CVE method
-# methods <- c("legacy", "simple", "sgd")
-methods <- c("legacy", "simple", "sgd")
+methods <- c("simple") #, "sgd") # "legacy"
+
+library(CVE) # load CVE
+if ("legacy" %in% methods) {
+    # Source legacy code (but only if needed)
+    source("CVE_legacy/function_script.R")
+}

 # Setup error and time tracking variables
 error <- matrix(NA, SIM.NR, length(methods) * length(dataset.names))
@ -79,7 +86,7 @@ for (sim in 1:SIM.NR) {
            }

            key <- paste0(name, '-', method)
-            error[sim, key] <- subspace_dist(dr$B, ds$B) / sqrt(2 * truedim)
+            error[sim, key] <- subspace.dist(dr$B, ds$B) / sqrt(2 * truedim)
            time[sim, key] <- dr.time["elapsed"]

            # Log results to file (mostly for long running simulations)