wip: writing C gradient version

2019-09-12 18:42:28 +02:00 · 2019-09-12 18:42:28 +02:00 · 4d2651fe8a
commit 4d2651fe8a
parent 47917fe0bd
24 changed files with 1414 additions and 285 deletions
--- a/CVE/demo/runtime_test.R
+++ b/CVE/demo/runtime_test.R
@ -1,88 +1,71 @@
-# -----------------------------------------------------------------------------
+# Usage:
-# Program: runtime_test.R
+# ~$ Rscript runtime_test.R
 # Author: Loki
 # Date: 2019.08.12
 #
 # Purpose:
 #      Comparing runtime of "MAVE" with "CVE".
 #
 # RevisionHistory:
 #      Loki -- 2019.08.12 initial creation
 # -----------------------------------------------------------------------------
-# load CVE package
+library(CVE) # load CVE
 library(CVE)
 # load MAVE package for comparison
 library(MAVE)
-# set nr of simulations per dataset
+#' Writes progress to console.
-nr.sim <- 10
+tell.user <- function(name, start.time, i, length) {
-
+    cat("\rRunning Test (", name, "):",
-# set names of datasets to run tests on
+        i, "/", length,
-dataset.names <- c("M1", "M2", "M3", "M4", "M5")
+        " - elapsed:", format(Sys.time() - start.time), "\033[K")
-
+}
-#' Orthogonal projection to sub-space spanned by `B`
+subspace.dist <- function(B1, B2){
-#'
+    P1 <- B1 %*% solve(t(B1) %*% B1) %*% t(B1)
-#' @param B Matrix
+    P2 <- B2 %*% solve(t(B2) %*% B2) %*% t(B2)
-#' @return Orthogonal Projection Matrix
+    return(norm(P1 - P2, type = 'F'))
 proj <- function(B) {
    B %*% solve(t(B) %*% B) %*% t(B)
 }
-#' Compute nObs given dataset dimension \code{n}.
+# Number of simulations
-#'
+SIM.NR <- 50
-#' @param n Number of samples
+# maximal number of iterations in curvilinear search algorithm
-#' @return Numeric estimate of \code{nObs}
+MAXIT <- 50
-nObs <- function (n) { n^0.5 }
+# number of arbitrary starting values for curvilinear optimization
 ATTEMPTS <- 10
 # set names of datasets
 dataset.names <- c("M1", "M2", "M3", "M4", "M5")
 # Set used CVE method
 methods <- c("simple") # c("legacy", "simple", "sgd", "linesearch")
-## prepare "logging"
+# Setup error and time tracking variables
-# result error, time, ... data.frame's
+error <- matrix(NA, SIM.NR, length(methods) * length(dataset.names))
-error <- matrix(nrow = nr.sim, ncol = 2 * length(dataset.names))
+time <- matrix(NA, SIM.NR, ncol(error))
-time <- matrix(nrow = nr.sim, ncol = 2 * length(dataset.names))
+colnames(error) <- kronecker(paste0(dataset.names, '-'), methods, paste0)
-# convert to data.frames
+colnames(time) <- colnames(error)
 error <- as.data.frame(error)
 time <- as.data.frame(time)
 # set names
 names(error) <- kronecker(c("CVE.", "MAVE."), dataset.names, paste0)
 names(time) <- kronecker(c("CVE.", "MAVE."), dataset.names, paste0)
-# get current time
+# only for telling user (to stdout)
 count <- 0
 start.time <- Sys.time()
-## main comparison loop (iterate `nr.sim` times for each dataset)
+# Start simulation loop.
-for (i in seq_along(dataset.names)) {
+for (sim in 1:SIM.NR) {
-    for (j in 1:nr.sim) {
+    # Repeat for each dataset.
-        name <- dataset.names[i]
+    for (name in dataset.names) {
-        # reporting progress
+        count <- count + 1
-        cat("\rRunning Test (", name, j , "):",
+        tell.user(name, start.time, count, SIM.NR * length(dataset.names))
-            (i - 1) * nr.sim + j, "/", length(dataset.names) * nr.sim,
+
-            " - Time since start:", format(Sys.time() - start.time), "\033[K")
+        # Create a new dataset
        # sample new dataset
        ds <- dataset(name)
-        k <- ncol(ds$B) # real dim
+        # Prepare X, Y and combine to data matrix
-        data <- data.frame(X = ds$X, Y = ds$Y)
+        Y <- ds$Y
-        # call CVE
+        X <- ds$X
-        cve.time <- system.time(
+        data <- cbind(Y, X)
-            cve.res <- cve(Y ~ .,
+        # get dimensions
-                           data = data,
+        dim <- ncol(X)
-                           method = "simple",
+        truedim <- ncol(ds$B)
-                           k = k)
+
-        )
+        for (method in methods) {
-        # call MAVE
+            dr.time <- system.time(
-        mave.time <- system.time(
+                dr <- cve.call(X, Y,
-            mave.res <- mave(Y ~ .,
+                    method = method,
-                             data = data,
+                    k = truedim,
-                             method = "meanMAVE")
+                    attempts = ATTEMPTS
-        )
+                )
-        # compute real and approximated sub-space projections
+            )
-        P <- proj(ds$B) # real
+            dr <- dr[[truedim]]
-        P.cve <- proj(cve.res$B)
+
-        P.mave <- proj(mave.res$dir[[k]])
+            key <- paste0(name, '-', method)
-        # compute (and store) errors
+            error[sim, key] <- subspace.dist(dr$B, ds$B) / sqrt(2 * truedim)
-        error[j, paste0("CVE.", name)] <- norm(P - P.cve, 'F') / sqrt(2 * k)
+            time[sim, key] <- dr.time["elapsed"]
-        error[j, paste0("MAVE.", name)] <- norm(P - P.mave, 'F') / sqrt(2 * k)
+        }
        # store run-times
        time[j, paste0("CVE.", name)] <- cve.time["elapsed"]
        time[j, paste0("MAVE.", name)] <- mave.time["elapsed"]
    }
 }
@ -91,16 +74,16 @@ print(colMeans(time))
 cat("\n## Error Means:\n")
 print(colMeans(error))
-len <- length(dataset.names)
+at <- seq(ncol(error)) + rep(seq(ncol(error) / length(methods)) - 1, each = length(methods))
-boxplot(as.matrix(error),
+boxplot(error,
-    main = paste0("Error (nr.sim = ", nr.sim, ")"),
+    main = paste0("Error (Nr of simulations ", SIM.NR, ")"),
-    ylab = expression(error == group("||", P[B] - P[hat(B)], "||")[F] / sqrt(2*k)),
+    ylab = "Error",
    las = 2,
-    at = c(1:len, 1:len + len + 1)
+    at = at
 )
-boxplot(as.matrix(time),
+boxplot(time,
-    main = paste0("Time (nr.sim = ", nr.sim, ")"),
+    main = paste0("Time (Nr of simulations ", SIM.NR, ")"),
-    ylab = "time [sec]",
+    ylab = "Time [sec]",
    las = 2,
-    at = c(1:len, 1:len + len + 1)
+    at = at
 )
--- a/CVE_R/DESCRIPTION
+++ b/CVE_R/DESCRIPTION
@ -1,4 +1,4 @@
-Package: CVE
+Package: CVEpureR
 Type: Package
 Title: Conditional Variance Estimator for Sufficient Dimension Reduction
 Version: 0.1
--- a/CVE_R/NAMESPACE
+++ b/CVE_R/NAMESPACE
@ -2,7 +2,6 @@
 S3method(plot,cve)
 S3method(summary,cve)
 export(col.pair.apply)
 export(cve)
 export(cve.call)
 export(cve.grid.search)
@ -15,7 +14,6 @@ export(estimate.bandwidth)
 export(grad)
 export(null)
 export(rStiefl)
 export(row.pair.apply)
 import(stats)
 importFrom(graphics,lines)
 importFrom(graphics,plot)
--- a/CVE_R/R/CVE.R
+++ b/CVE_R/R/CVE.R
@ -1,3 +1,17 @@
 #' Conditional Variance Estimator (CVE)
 #'
 #' Conditional Variance Estimator for Sufficient Dimension
 #'  Reduction
 #'
 #' TODO: And some details
 #'
 #'
 #' @references Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
 #'
 #' @docType package
 #' @author Loki
 "_PACKAGE"
 #' Implementation of the CVE method.
 #'
 #' Conditional Variance Estimator (CVE) is a novel sufficient dimension
@ -40,7 +54,7 @@
 #' @import stats
 #' @importFrom stats model.frame
 #' @export
-cve <- function(formula, data, method = "simple", ...) {
+cve <- function(formula, data, method = "simple", max.dim = 10, ...) {
    # check for type of `data` if supplied and set default
    if (missing(data)) {
        data <- environment(formula)
@ -69,12 +83,8 @@ cve <- function(formula, data, method = "simple", ...) {
 #' @param ... Method specific parameters.
 #' @rdname cve
 #' @export
-cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5, k, ...) {
+cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5,
-
+                     min.dim = 1, max.dim = 10, k, ...) {
    # TODO: replace default value of `k` by `max.dim` when fast enough
    if (missing(k)) {
        stop("TODO: parameter `k` (rank(B)) is required, replace by `max.dim`.")
    }
    # parameter checking
    if (!is.matrix(X)) {
@ -90,21 +100,42 @@ cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5, k, ...) {
        stop('X is one dimensional, no need for dimension reduction.')
    }
    if (!missing(k)) {
        min.dim <- as.integer(k)
        max.dim <- as.integer(k)
    } else {
        min.dim <- as.integer(min.dim)
        max.dim <- as.integer(min(max.dim, ncol(X) - 1L))
    }
    if (min.dim > max.dim) {
        stop('`min.dim` bigger `max.dim`.')
    }
    if (max.dim >= ncol(X)) {
        stop('`max.dim` must be smaller than `ncol(X)`.')
    }
    # Call specified method.
    method <- tolower(method)
-    if (method == 'simple') {
+    call <- match.call()
-        dr <- cve_simple(X, Y, k, nObs = nObs, ...)
+    dr <- list()
-    } else if (method == 'linesearch') {
+    for (k in min.dim:max.dim) {
-        dr <- cve_linesearch(X, Y, k, nObs = nObs, ...)
+        if (method == 'simple') {
-    } else if (method == 'sgd') {
+            dr.k <- cve_simple(X, Y, k, nObs = nObs, ...)
-        dr <- cve_sgd(X, Y, k, nObs = nObs, ...)
+        } else if (method == 'linesearch') {
-    } else {
+            dr.k <- cve_linesearch(X, Y, k, nObs = nObs, ...)
-        stop('Got unknown method.')
+        } else if (method == 'sgd') {
            dr.k <- cve_sgd(X, Y, k, nObs = nObs, ...)
        } else {
            stop('Got unknown method.')
        }
        dr.k$k <- k
        class(dr.k) <- "cve.k"
        dr[[k]] <- dr.k
    }
    # augment result information
-    dr$method <- method
+    dr.k$method <- method
-    dr$call <- match.call()
+    dr.k$call <- call
    class(dr) <- "cve"
    return(dr)
 }
@ -134,41 +165,42 @@ cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5, k, ...) {
 #' @export
 plot.cve <- function(x, ...) {
    H <- x$history
    H_1 <- H[!is.na(H[, 1]), 1]
-    defaults <- list(
+    # H <- x$history
-        main = "History",
+    # H_1 <- H[!is.na(H[, 1]), 1]
        xlab = "Iterations i",
        ylab = expression(loss == L[n](V^{(i)})),
        xlim = c(1, nrow(H)),
        ylim = c(0, max(H)),
        type = "l"
    )
-    call.plot <- match.call()
+    # defaults <- list(
-    keys <- names(defaults)
+    #     main = "History",
-    keys <- keys[match(keys, names(call.plot)[-1], nomatch = 0) == 0]
+    #     xlab = "Iterations i",
    #     ylab = expression(loss == L[n](V^{(i)})),
    #     xlim = c(1, nrow(H)),
    #     ylim = c(0, max(H)),
    #     type = "l"
    # )
-    for (key in keys) {
+    # call.plot <- match.call()
-        call.plot[[key]] <- defaults[[key]]
+    # keys <- names(defaults)
-    }
+    # keys <- keys[match(keys, names(call.plot)[-1], nomatch = 0) == 0]
-    call.plot[[1L]] <- quote(plot)
+    # for (key in keys) {
-    call.plot$x <- quote(1:length(H_1))
+    #     call.plot[[key]] <- defaults[[key]]
-    call.plot$y <- quote(H_1)
+    # }
-    eval(call.plot)
+    # call.plot[[1L]] <- quote(plot)
    # call.plot$x <- quote(1:length(H_1))
    # call.plot$y <- quote(H_1)
-    if (ncol(H) > 1) {
+    # eval(call.plot)
-        for (i in 2:ncol(H)) {
+
-            H_i <- H[H[, i] > 0, i]
+    # if (ncol(H) > 1) {
-            lines(1:length(H_i), H_i)
+    #     for (i in 2:ncol(H)) {
-        }
+    #         H_i <- H[H[, i] > 0, i]
-    }
+    #         lines(1:length(H_i), H_i)
-    x.ends <- apply(H, 2, function(h) { length(h[!is.na(h)]) })
+    #     }
-    y.ends <- apply(H, 2, function(h) { tail(h[!is.na(h)], n=1) })
+    # }
-    points(x.ends, y.ends)
+    # x.ends <- apply(H, 2, function(h) { length(h[!is.na(h)]) })
    # y.ends <- apply(H, 2, function(h) { tail(h[!is.na(h)], n=1) })
    # points(x.ends, y.ends)
 }
 #' Prints a summary of a \code{cve} result.
--- a/CVE_R/R/cve_linesearch.R
+++ b/CVE_R/R/cve_linesearch.R
@ -13,18 +13,14 @@ cve_linesearch <- function(X, Y, k,
                           slack = 0,
                           epochs = 50L,
                           attempts = 10L,
-                           max.linesearch.iter = 10L
+                           max.linesearch.iter = 10L,
                           logger = NULL
 ) {
    # Set `grad` functions environment to enable if to find this environments
    # local variabels, needed to enable the manipulation of this local variables
    # from within `grad`.
    environment(grad) <- environment()
    # Setup histories.
    loss.history <- matrix(NA, epochs, attempts)
    error.history <- matrix(NA, epochs, attempts)
    tau.history <- matrix(NA, epochs, attempts)
    # Get dimensions.
    n <- nrow(X)
    p <- ncol(X)
@ -44,8 +40,8 @@ cve_linesearch <- function(X, Y, k,
    # Compute lookup indexes for symmetrie, lower/upper
    # triangular parts and vectorization.
    pair.index <- elem.pairs(seq(n))
-    i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
+    i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
-    j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
+    j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
    # Matrix of vectorized indices. (vec(index) -> seq)
    index <- matrix(seq(n * n), n, n)
    lower <- index[lower.tri(index)]
@ -73,6 +69,13 @@ cve_linesearch <- function(X, Y, k,
        # Set last loss (aka, loss after applying the step).
        loss.last <- loss
        # Call logger with initial values before starting optimization.
        if (is.function(logger)) {
            epoch <- 0 # Set epoch count to 0 (only relevant for logging).
            error <- NA
            logger(environment())
        }
        ## Start optimization loop.
        for (epoch in 1:epochs) {
@ -124,16 +127,16 @@ cve_linesearch <- function(X, Y, k,
            # Compute error.
            error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
            # Write history.
            loss.history[epoch, attempt] <- loss
            error.history[epoch, attempt] <- error
            tau.history[epoch, attempt] <- tau
            # Check break condition (epoch check to skip ignored gradient calc).
            # Note: the devision by `sqrt(2 * k)` is included in `tol`.
            if (error < tol | epoch >= epochs) {
                # take last step and stop optimization.
                V <- V.tau
                # Final call to the logger before stopping optimization
                if (is.function(logger)) {
                    G <- G.tau
                    logger(environment())
                }
                break()
            }
@ -141,6 +144,12 @@ cve_linesearch <- function(X, Y, k,
            V <- V.tau
            loss.last <- loss
            G <- G.tau
            # Log after taking current step.
            if (is.function(logger)) {
                logger(environment())
            }
        }
        # Check if current attempt improved previous ones
@ -152,9 +161,6 @@ cve_linesearch <- function(X, Y, k,
    }
    return(list(
        loss.history = loss.history,
        error.history = error.history,
        tau.history = tau.history,
        loss = loss.best,
        V = V.best,
        B = null(V.best),
--- a/CVE_R/R/cve_sgd.R
+++ b/CVE_R/R/cve_sgd.R
@ -10,17 +10,14 @@ cve_sgd <- function(X, Y, k,
                    tol = 1e-3,
                    epochs = 50L,
                    batch.size = 16L,
-                    attempts = 10L
+                    attempts = 10L,
                    logger = NULL
 ) {
    # Set `grad` functions environment to enable if to find this environments
    # local variabels, needed to enable the manipulation of this local variables
    # from within `grad`.
    environment(grad) <- environment()
    # Setup histories.
    loss.history <- matrix(NA, epochs, attempts)
    error.history <- matrix(NA, epochs, attempts)
    # Get dimensions.
    n <- nrow(X) # Number of samples.
    p <- ncol(X) # Data dimensions
@ -32,7 +29,7 @@ cve_sgd <- function(X, Y, k,
    tol <- sqrt(2 * q) * tol
    # Estaimate bandwidth if not given.
-    if (missing(h) | !is.numeric(h)) {
+    if (missing(h) || !is.numeric(h)) {
        h <- estimate.bandwidth(X, k, nObs)
    }
@ -40,12 +37,11 @@ cve_sgd <- function(X, Y, k,
    # Compute lookup indexes for symmetrie, lower/upper
    # triangular parts and vectorization.
    pair.index <- elem.pairs(seq(n))
-    i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
+    i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
-    j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
+    j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
-    # Matrix of vectorized indices. (vec(index) -> seq)
+    # Index of vectorized matrix, for lower and upper triangular part.
-    index <- matrix(seq(n * n), n, n)
+    lower <- ((i - 1) * n) + j
-    lower <- index[lower.tri(index)]
+    upper <- ((j - 1) * n) + i
    upper <- t(index)[lower]
    # Create all pairewise differences of rows of `X`.
    X_diff <- X[i, , drop = F] - X[j, , drop = F]
@ -69,17 +65,23 @@ cve_sgd <- function(X, Y, k,
        # Keep track of last `V` for computing error after an epoch.
        V.last <- V
        if (is.function(logger)) {
            loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
            error <- NA
            epoch <- 0
            logger(environment())
        }
        # Repeat `epochs` times
        for (epoch in 1:epochs) {
            # Shuffle batches
            batch.shuffle <- sample(indices)
            # Make a step for each batch.
-            for (start in seq(1, n, batch.size)) {
+            for (batch.start in seq(1, n, batch.size)) {
                # Select batch data indices.
-                batch <- batch.shuffle[start:(start + batch.size - 1)]
+                batch.end <- min(batch.start + batch.size - 1, length(batch.shuffle))
-                # Remove `NA`'s (when `n` isn't a multiple of `batch.size`).
+                batch <- batch.shuffle[batch.start:batch.end]
                batch <- batch[!is.na(batch)]
                # Compute batch gradient.
                loss <- NULL
@ -93,21 +95,24 @@ cve_sgd <- function(X, Y, k,
                # Parallet transport (on Stiefl manifold) into direction of `G`.
                V <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
            }
            # Compute actuall loss after finishing optimization.
            loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
            # And the error for the history.
            error <- norm(V.last %*% t(V.last) - V %*% t(V), type = "F")
            V.last <- V
-            # Finaly write history.
+            if (is.function(logger)) {
-            loss.history[epoch, attempt] <- loss
+                # Compute loss at end of epoch for logging.
-            error.history[epoch, attempt] <- error
+                loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
                logger(environment())
            }
            # Check break condition.
            if (error < tol) {
                break()
            }
        }
        # Compute actual loss after finishing for comparing multiple attempts.
        loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
        # After each attempt, check if last attempt reached a better result.
        if (loss < loss.best) {
            loss.best <- loss
@ -116,8 +121,6 @@ cve_sgd <- function(X, Y, k,
    }
    return(list(
        loss.history = loss.history,
        error.history = error.history,
        loss = loss.best,
        V = V.best,
        B = null(V.best),
--- a/CVE_R/R/cve_simple.R
+++ b/CVE_R/R/cve_simple.R
@ -10,21 +10,18 @@ cve_simple <- function(X, Y, k,
                       tol = 1e-3,
                       slack = 0,
                       epochs = 50L,
-                       attempts = 10L
+                       attempts = 10L,
                       logger = NULL
 ) {
    # Set `grad` functions environment to enable if to find this environments
    # local variabels, needed to enable the manipulation of this local variables
    # from within `grad`.
    environment(grad) <- environment()
    # Setup histories.
    loss.history <- matrix(NA, epochs, attempts)
    error.history <- matrix(NA, epochs, attempts)
    # Get dimensions.
-    n <- nrow(X)
+    n <- nrow(X) # Number of samples.
-    p <- ncol(X)
+    p <- ncol(X) # Data dimensions
-    q <- p - k
+    q <- p - k   # Complement dimension of the SDR space.
    # Save initial learning rate `tau`.
    tau.init <- tau
@ -32,7 +29,7 @@ cve_simple <- function(X, Y, k,
    tol <- sqrt(2 * q) * tol
    # Estaimate bandwidth if not given.
-    if (missing(h) | !is.numeric(h)) {
+    if (missing(h) || !is.numeric(h)) {
        h <- estimate.bandwidth(X, k, nObs)
    }
@ -40,12 +37,11 @@ cve_simple <- function(X, Y, k,
    # Compute lookup indexes for symmetrie, lower/upper
    # triangular parts and vectorization.
    pair.index <- elem.pairs(seq(n))
-    i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
+    i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
-    j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
+    j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
-    # Matrix of vectorized indices. (vec(index) -> seq)
+    # Index of vectorized matrix, for lower and upper triangular part.
-    index <- matrix(seq(n * n), n, n)
+    lower <- ((i - 1) * n) + j
-    lower <- index[lower.tri(index)]
+    upper <- ((j - 1) * n) + i
    upper <- t(index)[lower]
    # Create all pairewise differences of rows of `X`.
    X_diff <- X[i, , drop = F] - X[j, , drop = F]
@ -58,8 +54,7 @@ cve_simple <- function(X, Y, k,
    # Start loop for multiple attempts.
    for (attempt in 1:attempts) {
-
+        # Reset learning rate `tau`.
        # reset step width `tau`
        tau <- tau.init
        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
@ -75,6 +70,13 @@ cve_simple <- function(X, Y, k,
        # Cayley transform matrix `A`
        A <- (G %*% t(V)) - (V %*% t(G))
        # Call logger with initial values before starting optimization.
        if (is.function(logger)) {
            epoch <- 0 # Set epoch count to 0 (only relevant for logging).
            error <- NA
            logger(environment())
        }
        ## Start optimization loop.
        for (epoch in 1:epochs) {
            # Apply learning rate `tau`.
@ -85,7 +87,7 @@ cve_simple <- function(X, Y, k,
            # Loss at position after a step.
            loss <- grad(X, Y, V.tau, h, loss.only = TRUE, persistent = TRUE)
-            # Check if step is appropriate
+            # Check if step is appropriate, iff not reduce learning rate.
            if ((loss - loss.last) > slack * loss.last) {
                tau <- tau / 2
                next() # Keep position and try with smaller `tau`.
@ -94,15 +96,15 @@ cve_simple <- function(X, Y, k,
            # Compute error.
            error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
            # Write history.
            loss.history[epoch, attempt] <- loss
            error.history[epoch, attempt] <- error
            # Check break condition (epoch check to skip ignored gradient calc).
            # Note: the devision by `sqrt(2 * k)` is included in `tol`.
-            if (error < tol | epoch >= epochs) {
+            if (error < tol || epoch >= epochs) {
                # take last step and stop optimization.
                V <- V.tau
                # Call logger last time befor stoping.
                if (is.function(logger)) {
                    logger(environment())
                }
                break()
            }
@ -110,6 +112,11 @@ cve_simple <- function(X, Y, k,
            V <- V.tau
            loss.last <- loss
            # Call logger after taking a step.
            if (is.function(logger)) {
                logger(environment())
            }
            # Compute gradient at new position.
            G <- grad(X, Y, V, h, persistent = TRUE)
@ -126,8 +133,6 @@ cve_simple <- function(X, Y, k,
    }
    return(list(
        loss.history = loss.history,
        error.history = error.history,
        loss = loss.best,
        V = V.best,
        B = null(V.best),
--- a/CVE_R/R/gradient.R
+++ b/CVE_R/R/gradient.R
@ -24,12 +24,11 @@ grad <- function(X, Y, V, h,
        # Compute lookup indexes for symmetrie, lower/upper
        # triangular parts and vectorization.
        pair.index <- elem.pairs(seq(n))
-        i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
+        i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
-        j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
+        j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
-        # Matrix of vectorized indices. (vec(index) -> seq)
+        # Index of vectorized matrix, for lower and upper triangular part.
-        index <- matrix(seq(n * n), n, n)
+        lower <- ((i - 1) * n) + j
-        lower <- index[lower.tri(index)]
+        upper <- ((j - 1) * n) + i
        upper <- t.default(index)[lower]
        # Create all pairewise differences of rows of `X`.
        X_diff <- X[i, , drop = F] - X[j, , drop = F]
@ -39,7 +38,7 @@ grad <- function(X, Y, V, h,
    Q <- diag(1, p) - tcrossprod(V, V)
    # Vectorized distance matrix `D`.
-    vecD <- rowSums((X_diff %*% Q)^2)
+    vecD <- colSums(tcrossprod(Q, X_diff)^2)
    # Weight matrix `W` (dnorm ... gaussean density function)
    W <- matrix(1, n, n) # `exp(0) == 1`
--- a/CVE_R/R/util.R
+++ b/CVE_R/R/util.R
@ -22,42 +22,19 @@ null <- function(V) {
    return(qr.Q(tmp, complete=TRUE)[, set, drop=FALSE])
 }
-#' Creates a (numeric) matrix where each row contains
+#' Creates a (numeric) matrix where each column contains
 #' an element to element matching.
 #' @param elements numeric vector of elements to match
-#' @return matrix of size `(n * (n - 1) / 2, 2)` for a argument of lenght `n`.
+#' @return matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`.
 #' @keywords internal
 #' @examples
 #'  elem.pairs(seq.int(2, 5))
 #' @export
 elem.pairs <- function(elements) {
    # Number of elements to match.
    n <- length(elements)
    # Create all combinations.
-    pairs <- cbind(rep(elements, each=n), rep(elements, n))
+    pairs <- rbind(rep(elements, each=n), rep(elements, n))
    # Select unique combinations without self interaction.
-    return(pairs[pairs[, 1] < pairs[, 2], ])
+    return(pairs[, pairs[1, ] < pairs[2, ]])
 }
 #' Apply function to pairs of matrix rows or columns.
 #'
 #' \code{row.pair.apply} applies \code{FUN} to each unique row pair without self
 #' interaction while \code{col.pair.apply} does the same for columns.
 #' @param X Matrix.
 #' @param FUN Function to apply to each pair.
 #' @examples
 #'  X <- matrix(seq(12), 4, 3)
 #' # matrix containing all row to row differences.
 #'  row.pair.apply(X, `-`)
 #' @aliases row.pair.apply, col.pair.apply
 #' @keywords internal
 #' @export
 row.pair.apply <- function(X, FUN) {
    pairs <- elem.pairs(seq(nrow(X)))
    return(FUN(X[pairs[, 1], ], X[pairs[, 2], ]))
 }
 #' @rdname row.pair.apply
 #' @keywords internal
 #' @export
 col.pair.apply <- function(X, FUN) {
    pairs <- elem.pairs(seq(ncol(X)))
    return(FUN(X[, pairs[, 1]], X[, pairs[, 2]]))
 }
--- a/CVE_R/demo/00Index
+++ b/CVE_R/demo/00Index
@ -0,0 +1,2 @@
 runtime_test    Runtime comparison of CVE against MAVE for M1 - M5 datasets.
 logging         Example of a logger function for cve algorithm analysis.
--- a/CVE_R/demo/logging.R
+++ b/CVE_R/demo/logging.R
@ -0,0 +1,43 @@
 library(CVEpureR)
 # Setup histories.
 (epochs <- 50)
 (attempts <- 10)
 loss.history       <- matrix(NA, epochs + 1, attempts)
 error.history      <- matrix(NA, epochs + 1, attempts)
 tau.history        <- matrix(NA, epochs + 1, attempts)
 true.error.history <- matrix(NA, epochs + 1, attempts)
 # Create a dataset
 ds <- dataset("M1")
 X <- ds$X
 Y <- ds$Y
 B <- ds$B # the true `B`
 (k <- ncol(ds$B))
 # True projection matrix.
 P <- B %*% solve(t(B) %*% B) %*% t(B)
 # Define the logger for the `cve()` method.
 logger <- function(env) {
    # Note the `<<-` assignement!
    loss.history[env$epoch + 1, env$attempt] <<- env$loss
    error.history[env$epoch + 1, env$attempt] <<- env$error
    tau.history[env$epoch + 1, env$attempt] <<- env$tau
    # Compute true error by comparing to the true `B`
    B.est <- null(env$V) # Function provided by CVE
    P.est <- B.est %*% solve(t(B.est) %*% B.est) %*% t(B.est)
    true.error <- norm(P - P.est, 'F') / sqrt(2 * k)
    true.error.history[env$epoch + 1, env$attempt] <<- true.error
 }
 # Performe SDR for ONE `k`.
 dr <- cve(Y ~ X, k = k, logger = logger, epochs = epochs, attempts = attempts)
 # Plot history's
 par(mfrow = c(2, 2))
 matplot(loss.history,       type = 'l', log = 'y', xlab = 'iter',
        main = 'loss', ylab = expression(L(V[iter])))
 matplot(error.history,      type = 'l', log = 'y', xlab = 'iter',
        main = 'error', ylab = 'error')
 matplot(tau.history,        type = 'l', log = 'y', xlab = 'iter',
        main = 'tau', ylab = 'tau')
 matplot(true.error.history, type = 'l', log = 'y', xlab = 'iter',
        main = 'true error', ylab = 'true error')
--- a/CVE_R/demo/runtime_test.R
+++ b/CVE_R/demo/runtime_test.R
@ -0,0 +1,89 @@
 # Usage:
 # ~$ Rscript runtime_test.R
 library(CVEpureR) # load CVE
 #' Writes progress to console.
 tell.user <- function(name, start.time, i, length) {
    cat("\rRunning Test (", name, "):",
        i, "/", length,
        " - elapsed:", format(Sys.time() - start.time), "\033[K")
 }
 subspace.dist <- function(B1, B2){
    P1 <- B1 %*% solve(t(B1) %*% B1) %*% t(B1)
    P2 <- B2 %*% solve(t(B2) %*% B2) %*% t(B2)
    return(norm(P1 - P2, type = 'F'))
 }
 # Number of simulations
 SIM.NR <- 50
 # maximal number of iterations in curvilinear search algorithm
 MAXIT <- 50
 # number of arbitrary starting values for curvilinear optimization
 ATTEMPTS <- 10
 # set names of datasets
 dataset.names <- c("M1", "M2", "M3", "M4", "M5")
 # Set used CVE method
 methods <- c("simple") # c("legacy", "simple", "sgd", "linesearch")
 # Setup error and time tracking variables
 error <- matrix(NA, SIM.NR, length(methods) * length(dataset.names))
 time <- matrix(NA, SIM.NR, ncol(error))
 colnames(error) <- kronecker(paste0(dataset.names, '-'), methods, paste0)
 colnames(time) <- colnames(error)
 # only for telling user (to stdout)
 count <- 0
 start.time <- Sys.time()
 # Start simulation loop.
 for (sim in 1:SIM.NR) {
    # Repeat for each dataset.
    for (name in dataset.names) {
        count <- count + 1
        tell.user(name, start.time, count, SIM.NR * length(dataset.names))
        # Create a new dataset
        ds <- dataset(name)
        # Prepare X, Y and combine to data matrix
        Y <- ds$Y
        X <- ds$X
        data <- cbind(Y, X)
        # get dimensions
        dim <- ncol(X)
        truedim <- ncol(ds$B)
        for (method in methods) {
            dr.time <- system.time(
                dr <- cve.call(X, Y,
                    method = method,
                    k = truedim,
                    attempts = ATTEMPTS
                )
            )
            dr <- dr[[truedim]]
            key <- paste0(name, '-', method)
            error[sim, key] <- subspace.dist(dr$B, ds$B) / sqrt(2 * truedim)
            time[sim, key] <- dr.time["elapsed"]
        }
    }
 }
 cat("\n\n## Time [sec] Means:\n")
 print(colMeans(time))
 cat("\n## Error Means:\n")
 print(colMeans(error))
 at <- seq(ncol(error)) + rep(seq(ncol(error) / length(methods)) - 1, each = length(methods))
 boxplot(error,
    main = paste0("Error (Nr of simulations ", SIM.NR, ")"),
    ylab = "Error",
    las = 2,
    at = at
 )
 boxplot(time,
    main = paste0("Time (Nr of simulations ", SIM.NR, ")"),
    ylab = "Time [sec]",
    las = 2,
    at = at
 )
--- a/CVE_R/man/CVEpureR-package.Rd
+++ b/CVE_R/man/CVEpureR-package.Rd
@ -0,0 +1,20 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/CVE.R
 \docType{package}
 \name{CVEpureR-package}
 \alias{CVEpureR}
 \alias{CVEpureR-package}
 \title{Conditional Variance Estimator (CVE)}
 \description{
 Conditional Variance Estimator for Sufficient Dimension
 Reduction
 }
 \details{
 TODO: And some details
 }
 \references{
 Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
 }
 \author{
 Loki
 }
--- a/CVE_R/man/cve.Rd
+++ b/CVE_R/man/cve.Rd
@ -5,9 +5,10 @@
 \alias{cve.call}
 \title{Implementation of the CVE method.}
 \usage{
-cve(formula, data, method = "simple", ...)
+cve(formula, data, method = "simple", max.dim = 10, ...)
-cve.call(X, Y, method = "simple", nObs = nrow(X)^0.5, k, ...)
+cve.call(X, Y, method = "simple", nObs = nrow(X)^0.5, min.dim = 1,
  max.dim = 10, k, ...)
 }
 \arguments{
 \item{formula}{Formel for the regression model defining `X`, `Y`.
--- a/CVE_R/man/cve_linesearch.Rd
+++ b/CVE_R/man/cve_linesearch.Rd
@ -7,7 +7,7 @@ conditions.}
 \usage{
 cve_linesearch(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
  tol = 0.001, rho1 = 0.1, rho2 = 0.9, slack = 0, epochs = 50L,
-  attempts = 10L, max.linesearch.iter = 10L)
+  attempts = 10L, max.linesearch.iter = 10L, logger = NULL)
 }
 \description{
 Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
--- a/CVE_R/man/cve_sgd.Rd
+++ b/CVE_R/man/cve_sgd.Rd
@ -6,7 +6,8 @@
 a classic GD method unsing no further tricks.}
 \usage{
 cve_sgd(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 0.01,
-  tol = 0.001, epochs = 50L, batch.size = 16L, attempts = 10L)
+  tol = 0.001, epochs = 50L, batch.size = 16L, attempts = 10L,
  logger = NULL)
 }
 \description{
 Simple implementation of the CVE method. 'Simple' means that this method is
--- a/CVE_R/man/cve_simple.Rd
+++ b/CVE_R/man/cve_simple.Rd
@ -6,7 +6,8 @@
 a classic GD method unsing no further tricks.}
 \usage{
 cve_simple(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
-  tol = 0.001, slack = 0, epochs = 50L, attempts = 10L)
+  tol = 0.001, slack = 0, epochs = 50L, attempts = 10L,
  logger = NULL)
 }
 \description{
 Simple implementation of the CVE method. 'Simple' means that this method is
--- a/CVE_R/man/elem.pairs.Rd
+++ b/CVE_R/man/elem.pairs.Rd
@ -2,7 +2,7 @@
 % Please edit documentation in R/util.R
 \name{elem.pairs}
 \alias{elem.pairs}
-\title{Creates a (numeric) matrix where each row contains
+\title{Creates a (numeric) matrix where each column contains
 an element to element matching.}
 \usage{
 elem.pairs(elements)
@ -11,10 +11,13 @@ elem.pairs(elements)
 \item{elements}{numeric vector of elements to match}
 }
 \value{
-matrix of size `(n * (n - 1) / 2, 2)` for a argument of lenght `n`.
+matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`.
 }
 \description{
-Creates a (numeric) matrix where each row contains
+Creates a (numeric) matrix where each column contains
 an element to element matching.
 }
 \examples{
 elem.pairs(seq.int(2, 5))
 }
 \keyword{internal}
--- a/CVE_R/man/row.pair.apply.Rd
+++ b/CVE_R/man/row.pair.apply.Rd
@ -1,27 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/util.R
 \name{row.pair.apply}
 \alias{row.pair.apply}
 \alias{row.pair.apply,}
 \alias{col.pair.apply}
 \title{Apply function to pairs of matrix rows or columns.}
 \usage{
 row.pair.apply(X, FUN)
 col.pair.apply(X, FUN)
 }
 \arguments{
 \item{X}{Matrix.}
 \item{FUN}{Function to apply to each pair.}
 }
 \description{
 \code{row.pair.apply} applies \code{FUN} to each unique row pair without self
 interaction while \code{col.pair.apply} does the same for columns.
 }
 \examples{
 X <- matrix(seq(12), 4, 3)
 # matrix containing all row to row differences.
 row.pair.apply(X, `-`)
 }
 \keyword{internal}
--- a/notes.md
+++ b/notes.md
@ -1,3 +1,12 @@
 # General Notes for Souce Code analysis
 ## Search in multiple files.
 Using the Linux `grep` program with the parameters `-rnw` and specifying a include files filter like the following example.
 ```bash
 grep --include=*\.{c,h,R} -rnw '.' -e "sweep"
 ```
 searches in all `C` source and header fils as well as `R` source files for the term _sweep_.
 # Development
 ## Build and install.
 To build the package the `devtools` package is used. This also provides `roxygen2` which is used for documentation and authomatic creaton of the `NAMESPACE` file.
 ```R
@ -29,6 +38,55 @@ install.packages(path, repos = NULL, type = "source")
 ```
 **Note: I only recommend this approach during development.**
 # Analysing
 ## Logging (a `cve` run).
 To log `loss`, `error` (estimated) the true error (error of current estimated `B` against the true `B`) or even the stepsize one can use the `logger` parameter. A `logger` is a function that gets the current `environment` of the CVE optimization methods (__do not alter this environment, only read from it__). This can be used to create logs like in the following example.
 ```R
 library(CVE)
 # Setup histories.
 (epochs <- 50)
 (attempts <- 10)
 loss.history       <- matrix(NA, epochs + 1, attempts)
 error.history      <- matrix(NA, epochs + 1, attempts)
 tau.history        <- matrix(NA, epochs + 1, attempts)
 true.error.history <- matrix(NA, epochs + 1, attempts)
 # Create a dataset
 ds <- dataset("M1")
 X <- ds$X
 Y <- ds$Y
 B <- ds$B # the true `B`
 (k <- ncol(ds$B))
 # True projection matrix.
 P <- B %*% solve(t(B) %*% B) %*% t(B)
 # Define the logger for the `cve()` method.
 logger <- function(env) {
    # Note the `<<-` assignement!
    loss.history[env$epoch + 1, env$attempt] <<- env$loss
    error.history[env$epoch + 1, env$attempt] <<- env$error
    tau.history[env$epoch + 1, env$attempt] <<- env$tau
    # Compute true error by comparing to the true `B`
    B.est <- null(env$V) # Function provided by CVE
    P.est <- B.est %*% solve(t(B.est) %*% B.est) %*% t(B.est)
    true.error <- norm(P - P.est, 'F') / sqrt(2 * k)
    true.error.history[env$epoch + 1, env$attempt] <<- true.error
 }
 # Performa SDR
 dr <- cve(Y ~ X, k = k, logger = logger, epochs = epochs, attempts = attempts)
 # Plot history's
 par(mfrow = c(2, 2))
 matplot(loss.history,       type = 'l', log = 'y', xlab = 'iter',
        main = 'loss', ylab = expression(L(V[iter])))
 matplot(error.history,      type = 'l', log = 'y', xlab = 'iter',
        main = 'error', ylab = 'error')
 matplot(tau.history,        type = 'l', log = 'y', xlab = 'iter',
        main = 'tau', ylab = 'tau')
 matplot(true.error.history, type = 'l', log = 'y', xlab = 'iter',
        main = 'true error', ylab = 'true error')
 ```
 ## Reading log files.
 The runtime tests (upcomming further tests) are creating log files saved in `tmp/`. These log files are `CSV` files (actualy `TSV`) with a header storing the test results. Depending on the test the files may contain differnt data. As an example we use the runtime test logs which store in each line the `dataset`, the used `method` as well as the `error` (actual error of estimated `B` against real `B`) and the `time`. For reading and analysing the data see the following example.
 ```R
@ -66,7 +124,9 @@ jedi.seeks <- function() {
 }
 trooper.seeks()
 # [1] "These aren't the droids you're looking for."
 jedi.seeks()
 # [1] "R2-D2", "C-3PO"
 ```
 The next example ilustrates how to write (without local copies) to variables outside the functions local environment.
@ -127,27 +187,25 @@ library(microbenchmark)
 A <- matrix(runif(120), 12, 10)
 # Matrix trace.
 tr <- function(M) sum(diag(M))
 # Check correctnes and benckmark performance.
 stopifnot(
    all.equal(
-        tr(t(A) %*% A),
+        sum(diag(t(A) %*% A)), sum(diag(crossprod(A, A)))
-        sum(diag(t(A) %*% A)),
+    ),
-        sum(A * A)
+    all.equal(
        sum(diag(t(A) %*% A)), sum(A * A)
    )
 )
 microbenchmark(
-    tr(t(A) %*% A),
+    MM = sum(diag(t(A) %*% A)),
-    sum(diag(t(A) %*% A)),
+    cross = sum(diag(crossprod(A, A))),
-    sum(A * A)
+    elem = sum(A * A)
 )
 # Unit: nanoseconds
-#                   expr  min   lq    mean median     uq   max neval
+#   expr  min     lq    mean median     uq   max neval
-#         tr(t(A) %*% A) 4335 4713 5076.36 4949.5 5402.5  7928   100
+#     MM 4232 4570.0 5138.81   4737 4956.0 40308   100
-#  sum(diag(t(A) %*% A)) 4106 4429 5233.89 4733.5 5057.5 49308   100
+#  cross 2523 2774.5 2974.93   2946 3114.5  5078   100
-#             sum(A * A)  540  681  777.07  740.0  818.5  3572   100
+#   elem  582  762.5  973.02    834  964.0 12945   100
 ```
 ```R
@ -243,6 +301,31 @@ microbenchmark(
 #  outer 1141.479 1216.929 1404.702 1317.7315 1582.800  2531.766   100
 ```
 ### Fast dist matrix computation (aka. row sum of squares).
 ```R
 library(microbenchmark)
 library(CVE)
 (n <- 200)
 (N <- n * (n - 1) / 2)
 (p <- 12)
 M <- matrix(runif(N * p), N, p)
 stopifnot(
    all.equal(rowSums(M^2), rowSums.c(M^2)),
    all.equal(rowSums(M^2), rowSquareSums.c(M))
 )
 microbenchmark(
    sums = rowSums(M^2),
    sums.c = rowSums.c(M^2),
    sqSums.c = rowSquareSums.c(M)
 )
 # Unit: microseconds
 #      expr     min       lq      mean    median       uq      max neval
 #      sums 666.311 1051.036 1612.3100 1139.0065 1547.657 13940.97   100
 #    sums.c 342.647  672.453 1009.9109  740.6255 1224.715 13765.90   100
 #  sqSums.c 115.325  142.128  175.6242  153.4645  169.678   759.87   100
 ```
 ## Using `Rprof()` for performance.
 The standart method for profiling where an algorithm is spending its time is with `Rprof()`.
--- a/runtime_test.R
+++ b/runtime_test.R
@ -14,7 +14,7 @@ subspace.dist <- function(B1, B2){
 }
 # Number of simulations
-SIM.NR <- 50
+SIM.NR <- 20
 # maximal number of iterations in curvilinear search algorithm
 MAXIT <- 50
 # number of arbitrary starting values for curvilinear optimization
@ -22,7 +22,7 @@ ATTEMPTS <- 10
 # set names of datasets
 dataset.names <- c("M1", "M2", "M3", "M4", "M5")
 # Set used CVE method
-methods <- c("simple") #, "sgd") # "legacy"
+methods <- c("simple") # c("legacy", "simple", "sgd", "linesearch")
 library(CVE) # load CVE
 if ("legacy" %in% methods) {
@ -83,6 +83,7 @@ for (sim in 1:SIM.NR) {
                        attempts = ATTEMPTS
                    )
                )
                dr <- dr[[truedim]]
            }
            key <- paste0(name, '-', method)
--- a/wip.R
+++ b/wip.R
@ -0,0 +1,329 @@
 library(microbenchmark)
 dyn.load("wip.so")
 ## rowSum* .call --------------------------------------------------------------
 rowSums.c <- function(M) {
    stopifnot(
        is.matrix(M),
        is.numeric(M)
    )
    if (!is.double(M)) {
        M <- matrix(as.double(M), nrow = nrow(M))
    }
    .Call('R_rowSums', PACKAGE = 'wip', M)
 }
 colSums.c <- function(M) {
    stopifnot(
        is.matrix(M),
        is.numeric(M)
    )
    if (!is.double(M)) {
        M <- matrix(as.double(M), nrow = nrow(M))
    }
    .Call('R_colSums', PACKAGE = 'wip', M)
 }
 rowSquareSums.c <- function(M) {
    stopifnot(
        is.matrix(M),
        is.numeric(M)
    )
    if (!is.double(M)) {
        M <- matrix(as.double(M), nrow = nrow(M))
    }
    .Call('R_rowSquareSums', PACKAGE = 'wip', M)
 }
 rowSumsSymVec.c <- function(vecA, nrow, diag = 0.0) {
    stopifnot(
        is.vector(vecA),
        is.numeric(vecA),
        is.numeric(diag),
        nrow * (nrow - 1) == length(vecA) * 2
    )
    if (!is.double(vecA)) {
        vecA <- as.double(vecA)
    }
    .Call('R_rowSumsSymVec', PACKAGE = 'wip',
          vecA, as.integer(nrow), as.double(diag))
 }
 rowSweep.c <- function(A, v, op = '-') {
    stopifnot(
        is.matrix(A),
        is.numeric(v)
    )
    if (!is.double(A)) {
        A <- matrix(as.double(A), nrow = nrow(A))
    }
    if (!is.vector(v) || !is.double(v)) {
        v <- as.double(v)
    }
    stopifnot(
        nrow(A) == length(v),
        op %in% c('+', '-', '*', '/')
    )
    .Call('R_rowSweep', PACKAGE = 'wip', A, v, op)
 }
 ## row*, col* tests ------------------------------------------------------------
 n <- 3000
 M <- matrix(runif(n * 12), n, 12)
 stopifnot(
    all.equal(rowSums(M^2), rowSums.c(M^2)),
    all.equal(colSums(M), colSums.c(M))
 )
 microbenchmark(
    rowSums     = rowSums(M^2),
    rowSums.c   = rowSums.c(M^2),
    rowSqSums.c = rowSquareSums.c(M)
 )
 microbenchmark(
    colSums   = colSums(M),
    colSums.c = colSums.c(M)
 )
 sum = rowSums(M)
 stopifnot(all.equal(
    sweep(M, 1, sum, FUN = `/`),
    rowSweep.c(M, sum, '/') # Col-Normalize)
 ), all.equal(
    sweep(M, 1, sum, FUN = `/`),
    M / sum
 ))
 microbenchmark(
    sweep = sweep(M, 1, sum, FUN = `/`),
    M / sum,
    rowSweep.c = rowSweep.c(M, sum, '/') # Col-Normalize)
 )
 # Create symmetric matrix with constant diagonal entries.
 nrow <- 200
 diag <- 1.0
 Sym <- tcrossprod(runif(nrow))
 diag(Sym) <- diag
 # Get vectorized lower triangular part of `Sym` matrix.
 SymVec <- Sym[lower.tri(Sym)]
 stopifnot(all.equal(
    rowSums(Sym),
    rowSumsSymVec.c(SymVec, nrow, diag)
 ))
 microbenchmark(
    rowSums = rowSums(Sym),
    rowSums.c = rowSums.c(Sym),
    rowSumsSymVec.c = rowSumsSymVec.c(SymVec, nrow, diag)
 )
 ## Matrix-Matrix opperation .call ---------------------------------------------
 matrixprod.c <- function(A, B) {
    stopifnot(
        is.matrix(A), is.numeric(A),
        is.matrix(B), is.numeric(B),
        ncol(A) == nrow(B)
    )
    if (!is.double(A)) {
        A <- matrix(as.double(A), nrow = nrow(A))
    }
    if (!is.double(B)) {
        B <- matrix(as.double(B), nrow = nrow(B))
    }
    .Call('R_matrixprod', PACKAGE = 'wip', A, B)
 }
 crossprod.c <- function(A, B) {
    stopifnot(
        is.matrix(A), is.numeric(A),
        is.matrix(B), is.numeric(B),
        nrow(A) == nrow(B)
    )
    if (!is.double(A)) {
        A <- matrix(as.double(A), nrow = nrow(A))
    }
    if (!is.double(B)) {
        B <- matrix(as.double(B), nrow = nrow(B))
    }
    .Call('R_crossprod', PACKAGE = 'wip', A, B)
 }
 skewSymRank2k.c <- function(A, B, alpha = 1, beta = 0) {
    stopifnot(
        is.matrix(A), is.numeric(A),
        is.matrix(B), is.numeric(B),
        all(dim(A) == dim(B)),
        is.numeric(alpha), length(alpha) == 1L,
        is.numeric(beta), length(beta) == 1L
    )
    if (!is.double(A)) {
        A <- matrix(as.double(A), nrow = nrow(A))
    }
    if (!is.double(B)) {
        B <- matrix(as.double(B), nrow = nrow(B))
    }
    .Call('R_skewSymRank2k', PACKAGE = 'wip', A, B,
          as.double(alpha), as.double(beta))
 }
 ## Matrix-Matrix opperation tests ---------------------------------------------
 n <- 200
 k <- 100
 m <- 300
 A <- matrix(runif(n * k), n, k)
 B <- matrix(runif(k * m), k, m)
 stopifnot(
    all.equal(A %*% B, matrixprod.c(A, B))
 )
 microbenchmark(
    "%*%" = A %*% B,
    matrixprod.c = matrixprod.c(A, B)
 )
 A <- matrix(runif(k * n), k, n)
 B <- matrix(runif(k * m), k, m)
 stopifnot(
    all.equal(crossprod(A, B), crossprod.c(A, B))
 )
 microbenchmark(
    crossprod = crossprod(A, B),
    crossprod.c = crossprod.c(A, B)
 )
 n <- 12
 k <- 11
 A <- matrix(runif(n * k), n, k)
 B <- matrix(runif(n * k), n, k)
 stopifnot(all.equal(
    A %*% t(B) - B %*% t(A), skewSymRank2k.c(A, B)
 ))
 microbenchmark(
    A %*% t(B) - B %*% t(A),
    skewSymRank2k.c(A, B)
 )
 ## Orthogonal projection onto null space .Call --------------------------------
 nullProj.c <- function(B) {
    stopifnot(
        is.matrix(B), is.numeric(B)
    )
    if (!is.double(B)) {
        B <- matrix(as.double(B), nrow = nrow(B))
    }
    .Call('R_nullProj', PACKAGE = 'wip', B)
 }
 ## Orthogonal projection onto null space tests --------------------------------
 p <- 12
 q <- 10
 V <- qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q)))
 # Projection matrix onto `span(V)`
 Q <- diag(1, p) - tcrossprod(V, V)
 stopifnot(
    all.equal(Q, nullProj.c(V))
 )
 microbenchmark(
    nullProj = diag(1, p) - tcrossprod(V, V),
    nullProj.c = nullProj.c(V)
 )
 # ## WIP for gradient. ----------------------------------------------------------
 gradient.c <- function(X, X_diff, Y, V, h) {
    stopifnot(
        is.matrix(X), is.double(X),
        is.matrix(X_diff), is.double(X_diff),
            ncol(X_diff) == ncol(X), nrow(X_diff) == nrow(X) * (nrow(X) - 1) / 2,
        is.vector(Y) || (is.matrix(Y) && pmin(dim(Y)) == 1L), is.double(Y),
            length(Y) == nrow(X),
        is.matrix(V), is.double(V),
            nrow(V) == ncol(X),
        is.vector(h), is.numeric(h), length(h) == 1
    )
    .Call('R_gradient', PACKAGE = 'wip',
          X, X_diff, as.double(Y), V, as.double(h));
 }
 elem.pairs <- function(elements) {
    # Number of elements to match.
    n <- length(elements)
    # Create all combinations.
    pairs <- rbind(rep(elements, each=n), rep(elements, n))
    # Select unique combinations without self interaction.
    return(pairs[, pairs[1, ] < pairs[2, ]])
 }
 grad <- function(X, Y, V, h, persistent = TRUE) {
    n <- nrow(X)
    p <- ncol(X)
    if (!persistent) {
        pair.index <- elem.pairs(seq(n))
        i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
        j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
        lower <- ((i - 1) * n) + j
        upper <- ((j - 1) * n) + i
        X_diff <- X[i, , drop = F] - X[j, , drop = F]
    }
    # Projection matrix onto `span(V)`
    Q <- diag(1, p) - tcrossprod(V, V)
    # Vectorized distance matrix `D`.
    vecD <- rowSums((X_diff %*% Q)^2)
    # Weight matrix `W` (dnorm ... gaussean density function)
    W <- matrix(1, n, n) # `exp(0) == 1`
    W[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part
    W[upper] <- t.default(W)[upper] # Mirror lower tri. to upper
    W <- sweep(W, 2, colSums(W), FUN = `/`) # Col-Normalize
    # Weighted `Y` momentums
    y1 <- Y %*% W # Result is 1D -> transposition irrelevant
    y2 <- Y^2 %*% W
    # Per example loss `L(V, X_i)`
    L <- y2 - y1^2
    # Vectorized Weights with forced symmetry
    vecS <- (L[i] - (Y[j] - y1[i])^2) * W[lower]
    vecS <- vecS + ((L[j] - (Y[i] - y1[j])^2) * W[upper])
    # Compute scaling of `X` row differences
    vecS <- vecS * vecD
    G <- crossprod(X_diff, X_diff * vecS) %*% V
    G <- (-2 / (n * h^2)) * G
    return(G)
 }
 rStiefl <- function(p, q) {
    return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
 }
 n <- 200
 p <- 12
 q <- 10
 X <- matrix(runif(n * p), n, p)
 Y <- runif(n)
 V <- rStiefl(p, q)
 h <- 0.1
 pair.index <- elem.pairs(seq(n))
 i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
 j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
 lower <- ((i - 1) * n) + j
 upper <- ((j - 1) * n) + i
 X_diff <- X[i, , drop = F] - X[j, , drop = F]
 stopifnot(all.equal(
    grad(X, Y, V, h),
    gradient.c(X, X_diff, Y, V, h)
 ))
 microbenchmark(
    grad = grad(X, Y, V, h),
    gradient.c = gradient.c(X, X_diff, Y, V, h)
 )
--- a/wip.c
+++ b/wip.c
@ -0,0 +1,421 @@
 #include <stdlib.h>
 #include <R_ext/BLAS.h>
 #include <R_ext/Lapack.h>
 #include <R_ext/Error.h>
 // #include <Rmath.h>
 #include "wip.h"
 static inline void rowSums(const double *A,
                           const int nrow, const int ncol,
                           double *sum) {
    int i, j, block_size, block_size_i;
    const double *A_block = A;
    const double *A_end = A + nrow * ncol;
    if (nrow > CVE_MEM_CHUNK_SIZE) {
        block_size = CVE_MEM_CHUNK_SIZE;
    } else {
        block_size = nrow;
    }
    // Iterate `(block_size_i, ncol)` submatrix blocks.
    for (i = 0; i < nrow; i += block_size_i) {
        // Reset `A` to new block beginning.
        A = A_block;
        // Take block size of eveything left and reduce to max size.
        block_size_i = nrow - i;
        if (block_size_i > block_size) {
            block_size_i = block_size;
        }
        // Compute first blocks column,
        for (j = 0; j < block_size_i; ++j) {
            sum[j] = A[j];
        }
        // and sum the following columns to the first one.
        for (A += nrow; A < A_end; A += nrow) {
            for (j = 0; j < block_size_i; ++j) {
                sum[j] += A[j];
            }
        }
        // Step one block forth.
        A_block += block_size_i;
        sum += block_size_i;
    }
 }
 static inline void colSums(const double *A,
                           const int nrow, const int ncol,
                           double *sum) {
    int j;
    double *sum_end = sum + ncol;
    memset(sum, 0, sizeof(double) * ncol);
    for (; sum < sum_end; ++sum) {
        for (j = 0; j < nrow; ++j) {
            *sum += A[j];
        }
        A += nrow;
    }
 }
 static inline void rowSquareSums(const double *A,
                                 const int nrow, const int ncol,
                                 double *sum) {
    int i, j, block_size, block_size_i;
    const double *A_block = A;
    const double *A_end = A + nrow * ncol;
    if (nrow < CVE_MEM_CHUNK_SIZE) {
        block_size = nrow;
    } else {
        block_size = CVE_MEM_CHUNK_SIZE;
    }
    // Iterate `(block_size_i, ncol)` submatrix blocks.
    for (i = 0; i < nrow; i += block_size_i) {
        // Reset `A` to new block beginning.
        A = A_block;
        // Take block size of eveything left and reduce to max size.
        block_size_i = nrow - i;
        if (block_size_i > block_size) {
            block_size_i = block_size;
        }
        // Compute first blocks column,
        for (j = 0; j < block_size_i; ++j) {
            sum[j] = A[j] * A[j];
        }
        // and sum the following columns to the first one.
        for (A += nrow; A < A_end; A += nrow) {
            for (j = 0; j < block_size_i; ++j) {
                sum[j] += A[j] * A[j];
            }
        }
        // Step one block forth.
        A_block += block_size_i;
        sum += block_size_i;
    }
 }
 static inline void rowSumsSymVec(const double *Avec, const int nrow,
                                 const double *diag,
                                 double *sum) {
    int i, j;
    if (*diag == 0.0) {
        memset(sum, 0, nrow * sizeof(double));
    } else {
        for (i = 0; i < nrow; ++i) {
            sum[i] = *diag;
        }
    }
    for (j = 0; j < nrow; ++j) {
        for (i = j + 1; i < nrow; ++i, ++Avec) {
            sum[j] += *Avec;
            sum[i] += *Avec;
        }
    }
 }
 /* C[, j] = A[, j] * v for each j = 1 to ncol */
 static void rowSweep(const double *A, const int nrow, const int ncol,
                     const char* op,
                     const double *v, // vector of length nrow
                     double *C) {
    int i, j, block_size, block_size_i;
    const double *A_block = A;
    double *C_block = C;
    const double *A_end = A + nrow * ncol;
    if (nrow > CVE_MEM_CHUNK_SMALL) { // small because 3 vectors in cache
        block_size = CVE_MEM_CHUNK_SMALL;
    } else {
        block_size = nrow;
    }
    if (*op == '+') {
        // Iterate `(block_size_i, ncol)` submatrix blocks.
        for (i = 0; i < nrow; i += block_size_i) {
            // Set `A` and `C` to block beginning.
            A = A_block;
            C = C_block;
            // Get current block's row size.
            block_size_i = nrow - i;
            if (block_size_i > block_size) {
                block_size_i = block_size;
            }
            // Perform element wise operation for block.
            for (; A < A_end; A += nrow, C += nrow) {
                for (j = 0; j < block_size_i; ++j) {
                    C[j] = A[j] + v[j]; // FUN = '+'
                }
            }
            // Step one block forth.
            A_block += block_size_i;
            C_block += block_size_i;
            v += block_size_i;
        }
    } else if (*op == '-') {
        // Iterate `(block_size_i, ncol)` submatrix blocks.
        for (i = 0; i < nrow; i += block_size_i) {
            // Set `A` and `C` to block beginning.
            A = A_block;
            C = C_block;
            // Get current block's row size.
            block_size_i = nrow - i;
            if (block_size_i > block_size) {
                block_size_i = block_size;
            }
            // Perform element wise operation for block.
            for (; A < A_end; A += nrow, C += nrow) {
                for (j = 0; j < block_size_i; ++j) {
                    C[j] = A[j] - v[j]; // FUN = '-'
                }
            }
            // Step one block forth.
            A_block += block_size_i;
            C_block += block_size_i;
            v += block_size_i;
        }
    } else if (*op == '*') {
        // Iterate `(block_size_i, ncol)` submatrix blocks.
        for (i = 0; i < nrow; i += block_size_i) {
            // Set `A` and `C` to block beginning.
            A = A_block;
            C = C_block;
            // Get current block's row size.
            block_size_i = nrow - i;
            if (block_size_i > block_size) {
                block_size_i = block_size;
            }
            // Perform element wise operation for block.
            for (; A < A_end; A += nrow, C += nrow) {
                for (j = 0; j < block_size_i; ++j) {
                    C[j] = A[j] * v[j]; // FUN = '*'
                }
            }
            // Step one block forth.
            A_block += block_size_i;
            C_block += block_size_i;
            v += block_size_i;
        }
    } else if (*op == '/') {
        // Iterate `(block_size_i, ncol)` submatrix blocks.
        for (i = 0; i < nrow; i += block_size_i) {
            // Set `A` and `C` to block beginning.
            A = A_block;
            C = C_block;
            // Get current block's row size.
            block_size_i = nrow - i;
            if (block_size_i > block_size) {
                block_size_i = block_size;
            }
            // Perform element wise operation for block.
            for (; A < A_end; A += nrow, C += nrow) {
                for (j = 0; j < block_size_i; ++j) {
                    C[j] = A[j] / v[j]; // FUN = '/'
                }
            }
            // Step one block forth.
            A_block += block_size_i;
            C_block += block_size_i;
            v += block_size_i;
        }
    } else {
        error("Got unknown 'op' (opperation) argument.");
    }
 }
 static inline void matrixprod(const double *A, const int nrowA, const int ncolA,
                              const double *B, const int nrowB, const int ncolB,
                              double *C) {
    const double one = 1.0;
    const double zero = 0.0;
    // DGEMM with parameterization:
    //     C <- A %*% B
    F77_NAME(dgemm)("N", "N", &nrowA, &ncolB, &ncolA,
                    &one, A, &nrowA, B, &nrowB,
                    &zero, C, &nrowA);
 }
 static inline void crossprod(const double *A, const int nrowA, const int ncolA,
                             const double *B, const int nrowB, const int ncolB,
                             double *C) {
    const double one = 1.0;
    const double zero = 0.0;
    // DGEMM with parameterization:
    //     C <- t(A) %*% B
    F77_NAME(dgemm)("T", "N", &ncolA, &ncolB, &nrowA,
                    &one, A, &nrowA, B, &nrowB,
                    &zero, C, &ncolA);
 }
 static inline void nullProj(const double *B, const int nrowB, const int ncolB,
                            double *Q) {
    const double minusOne = -1.0;
    const double one = 1.0;
    // Initialize as identity matrix.
    memset(Q, 0, sizeof(double) * nrowB * nrowB);
    double *Q_diag, *Q_end = Q + nrowB * nrowB;
    for (Q_diag = Q; Q_diag < Q_end; Q_diag += nrowB + 1) {
        *Q_diag = 1.0;
    }
    // DGEMM with parameterization:
    //     C <- (-1.0 * B %*% t(B)) + C
    F77_NAME(dgemm)("N", "T", &nrowB, &nrowB, &ncolB,
                    &minusOne, B, &nrowB, B, &nrowB,
                    &one, Q, &nrowB);
 }
 static inline void rangePairs(const int from, const int to, int *pairs) {
    int i, j;
    for (i = from; i < to; ++i) {
       for (j = i + 1; j < to; ++j) {
            pairs[0] = i;
            pairs[1] = j;
            pairs += 2;
        }
    }
 }
 // A dence skwe-symmetric rank 2 update.
 // Perform the update
 //      C := alpha (A * B^T - B * A^T) + beta C
 static void skewSymRank2k(const int nrow, const int ncol,
                          double alpha, const double *A, const double *B,
                          double beta,
                          double *C) {
    F77_NAME(dgemm)("N", "T",
             &nrow, &nrow, &ncol,
             &alpha, A, &nrow, B, &nrow,
             &beta, C, &nrow);
    alpha *= -1.0;
    beta = 1.0;
    F77_NAME(dgemm)("N", "T",
             &nrow, &nrow, &ncol,
             &alpha, B, &nrow, A, &nrow,
             &beta, C, &nrow);
 }
 // TODO: mutch potential for optimization!!!
 static inline void weightedYandLoss(const int n,
                                    const double *Y,
                                    const double *vecD,
                                    const double *vecW,
                                    const double *colSums,
                                    double *y1, double *L, double *vecS,
                                    double *const loss) {
    int i, j, k, N = n * (n - 1) / 2;
    double l;
    for (i = 0; i < n; ++i) {
        y1[i] = Y[i] / colSums[i];
        L[i] = Y[i] * Y[i] / colSums[i];
    }
    for (k = j = 0; j < n; ++j) {
        for (i = j + 1; i < n; ++i, ++k) {
            y1[i] += Y[j] * vecW[k] / colSums[i];
            y1[j] += Y[i] * vecW[k] / colSums[j];
            L[i] += Y[j] * Y[j] * vecW[k] / colSums[i];
            L[j] += Y[i] * Y[i] * vecW[k] / colSums[j];
        }
    }
    l = 0.0;
    for (i = 0; i < n; ++i) {
        l += (L[i] -= y1[i] * y1[i]);
    }
    *loss = l / (double)n;
    for (k = j = 0; j < n; ++j) {
        for (i = j + 1; i < n; ++i, ++k) {
            l = Y[j] - y1[i];
            vecS[k]  = (L[i] - (l * l)) / colSums[i];
            l = Y[i] - y1[j];
            vecS[k] += (L[j] - (l * l)) / colSums[j];
        }
    }
    for (k = 0; k < N; ++k) {
        vecS[k] *= vecW[k] * vecD[k];
    }
 }
 inline double gaussKernel(const double x, const double scale) {
    return exp(scale * x * x);
 }
 static void gradient(const int n, const int p, const int q,
                     const double *X,
                     const double *X_diff,
                     const double *Y,
                     const double *V,
                     const double h,
                     double *G, double *const loss) {
    // Number of X_i to X_j not trivial pairs.
    int i, N = (n * (n - 1)) / 2;
    double scale = -0.5 / h;
    const double one = 1.0;
    if (X_diff == (void*)0) {
        // TODO: ...
    }
    // Allocate and compute projection matrix `Q = I_p - V * V^T`
    double *Q = (double*)malloc(p * p * sizeof(double));
    nullProj(V, p, q, Q);
    // allocate and compute vectorized distance matrix with a temporary
    // projection of `X_diff`.
    double *vecD = (double*)malloc(N * sizeof(double));
    double *X_proj;
    if (p < 5) { // TODO: refine that!
        X_proj = (double*)malloc(N * 5 * sizeof(double));
    } else {
        X_proj = (double*)malloc(N * p * sizeof(double));
    }
    matrixprod(X_diff, N, p, Q, p, p, X_proj);
    rowSquareSums(X_proj, N, p, vecD);
    // Apply kernel to distence vector for weights computation.
    double *vecW = X_proj; // reuse memory area, no longer needed.
    for (i = 0; i < N; ++i) {
        vecW[i] = gaussKernel(vecD[i], scale);
    }
    double *colSums = X_proj + N; // still allocated!
    rowSumsSymVec(vecW, n, &one, colSums); // rowSums = colSums cause Sym
    // compute weighted responces of first end second momontum, aka y1, y2.
    double *y1 = X_proj + N + n;
    double *L = X_proj + N + (2 * n);
    // Allocate X_diff scaling vector `vecS`, not in `X_proj` mem area because
    // used symultanious to X_proj in final gradient computation.
    double *vecS = (double*)malloc(N * sizeof(double));
    weightedYandLoss(n, Y, vecD, vecW, colSums, y1, L, vecS, loss);
    // compute the gradient using X_proj for intermidiate scaled X_diff.
    rowSweep(X_diff, N, p, "*", vecS, X_proj);
    // reuse Q which has the required dim (p, p).
    crossprod(X_diff, N, p, X_proj, N, p, Q);
    // Product with V
    matrixprod(Q, p, p, V, p, q, G);
    // And final scaling (TODO: move into matrixprod!)
    scale = -2.0 / (((double)n) * h * h);
    N = p * q;
    for (i = 0; i < N; ++i) {
        G[i] *= scale;
    }
    free(vecS);
    free(X_proj);
    free(vecD);
    free(Q);
 }
--- a/wip.h
+++ b/wip.h
@ -0,0 +1,159 @@
 #ifndef _CVE_INCLUDE_GUARD_
 #define _CVE_INCLUDE_GUARD_
 #include <Rinternals.h>
 #define CVE_MEM_CHUNK_SMALL 1016
 #define CVE_MEM_CHUNK_SIZE 2032
 static inline void rowSums(const double *A,
                           const int nrow, const int ncol,
                           double *sum);
 SEXP R_rowSums(SEXP A) {
    SEXP sums = PROTECT(allocVector(REALSXP, nrows(A)));
    rowSums(REAL(A), nrows(A), ncols(A), REAL(sums));
    UNPROTECT(1);
    return sums;
 }
 static inline void colSums(const double *A,
                           const int nrow, const int ncol,
                           double *sum);
 SEXP R_colSums(SEXP A) {
    SEXP sums = PROTECT(allocVector(REALSXP, ncols(A)));
    colSums(REAL(A), nrows(A), ncols(A), REAL(sums));
    UNPROTECT(1);
    return sums;
 }
 static inline void rowSquareSums(const double*, const int, const int, double*);
 SEXP R_rowSquareSums(SEXP A) {
    SEXP result = PROTECT(allocVector(REALSXP, nrows(A)));
    rowSquareSums(REAL(A), nrows(A), ncols(A), REAL(result));
    UNPROTECT(1);
    return result;
 }
 static inline void rowSumsSymVec(const double *Avec, const int nrow,
                                 const double *diag,
                                 double *sum);
 SEXP R_rowSumsSymVec(SEXP Avec, SEXP nrow, SEXP diag) {
    SEXP sum = PROTECT(allocVector(REALSXP, *INTEGER(nrow)));
    rowSumsSymVec(REAL(Avec), *INTEGER(nrow), REAL(diag), REAL(sum));
    UNPROTECT(1);
    return sum;
 }
 static void rowSweep(const double *A, const int nrow, const int ncol,
                     const char* op,
                     const double *v, // vector of length nrow
                     double *C);
 SEXP R_rowSweep(SEXP A, SEXP v, SEXP op) {
    SEXP C = PROTECT(allocMatrix(REALSXP, nrows(A), ncols(A)));
    rowSweep(REAL(A), nrows(A), ncols(A),
             CHAR(STRING_ELT(op, 0)),
             REAL(v), REAL(C));
    UNPROTECT(1);
    return C;
 }
 static inline void matrixprod(const double *A, const int nrowA, const int ncolA,
                              const double *B, const int nrowB, const int ncolB,
                              double *C);
 SEXP R_matrixprod(SEXP A, SEXP B) {
    SEXP C = PROTECT(allocMatrix(REALSXP, nrows(A), ncols(B)));
    matrixprod(REAL(A), nrows(A), ncols(A),
               REAL(B), nrows(B), ncols(B),
               REAL(C));
    UNPROTECT(1);
    return C;
 }
 static inline void crossprod(const double* A, const int nrowA, const int ncolA,
                             const double* B, const int ncolB, const int nrowB,
                             double* C);
 SEXP R_crossprod(SEXP A, SEXP B) {
    SEXP C = PROTECT(allocMatrix(REALSXP, ncols(A), ncols(B)));
    crossprod(REAL(A), nrows(A), ncols(A),
              REAL(B), nrows(B), ncols(B),
              REAL(C));
    UNPROTECT(1);
    return C;
 }
 static void skewSymRank2k(const int n, const int k,
                          double alpha, const double *A, const double *B,
                          double beta,
                          double *C);
 SEXP R_skewSymRank2k(SEXP A, SEXP B, SEXP alpha, SEXP beta) {
    SEXP C = PROTECT(allocMatrix(REALSXP, nrows(A), nrows(A)));
    memset(REAL(C), 0, nrows(A) * nrows(A) * sizeof(double));
    skewSymRank2k(nrows(A), ncols(A),
                  *REAL(alpha), REAL(A), REAL(B),
                  *REAL(beta), REAL(C));
    UNPROTECT(1);
    return C;
 }
 static inline void nullProj(const double* B, const int nrowB, const int ncolB,
                            double* Q);
 SEXP R_nullProj(SEXP B) {
    SEXP Q = PROTECT(allocMatrix(REALSXP, nrows(B), nrows(B)));
    nullProj(REAL(B), nrows(B), ncols(B), REAL(Q));
    UNPROTECT(1);
    return Q;
 }
 static inline void rangePairs(const int from, const int to, int *pairs);
 SEXP R_rangePairs(SEXP from, SEXP to) {
    int start = asInteger(from);
    int end = asInteger(to) + 1;
    int n = end - start;
    SEXP out = PROTECT(allocMatrix(INTSXP, 2, n * (n - 1) / 2));
    rangePairs(start, end, INTEGER(out));
    UNPROTECT(1);
    return out;
 }
 static void gradient(const int n, const int p, const int q,
                     const double *X,
                     const double *X_diff,
                     const double *Y,
                     const double *V,
                     const double h,
                     double *G, double *const loss);
 SEXP R_gradient(SEXP X, SEXP X_diff, SEXP Y, SEXP V, SEXP h) {
    int N = (nrows(X) * (nrows(X) - 1)) / 2;
    SEXP G = PROTECT(allocMatrix(REALSXP, nrows(V), ncols(V)));
    SEXP loss = PROTECT(allocVector(REALSXP, 1));
    gradient(nrows(X), ncols(X), ncols(V),
             REAL(X), REAL(X_diff), REAL(Y), REAL(V), *REAL(h),
             REAL(G), REAL(loss));
    UNPROTECT(2);
    return G;
 }
 #endif /* _CVE_INCLUDE_GUARD_ */
		`@ -0,0 +1,2 @@`
							`runtime_test Runtime comparison of CVE against MAVE for M1 - M5 datasets.`
							`logging Example of a logger function for cve algorithm analysis.`