add: C implementation,

wip: smaller stuff
2019-09-16 11:15:51 +02:00 · 2019-09-16 11:15:51 +02:00 · 9e46a2d3d7
commit 9e46a2d3d7
parent 4d2651fe8a
44 changed files with 2013 additions and 11 deletions
--- a/CVE_C/DESCRIPTION
+++ b/CVE_C/DESCRIPTION
@ -0,0 +1,11 @@
 Package: CVE
 Type: Package
 Title: Conditional Variance Estimator for Sufficient Dimension Reduction
 Version: 0.1
 Date: 2019-08-29
 Author: Loki
 Maintainer: Loki <loki@no.mail>
 Description: Implementation of the Conditional Variance Estimation (CVE) method. This package version is writen in pure R.
 License: GPL-3
 Encoding: UTF-8
 RoxygenNote: 6.1.1
--- a/CVE_C/NAMESPACE
+++ b/CVE_C/NAMESPACE
@ -0,0 +1,24 @@
 # Generated by roxygen2: do not edit by hand
 S3method(plot,cve)
 S3method(summary,cve)
 export(cve)
 export(cve.call)
 export(cve.grid.search)
 export(cve_linesearch)
 export(cve_sgd)
 export(cve_simple)
 export(dataset)
 export(elem.pairs)
 export(estimate.bandwidth)
 export(grad)
 export(null)
 export(rStiefl)
 import(stats)
 importFrom(graphics,lines)
 importFrom(graphics,plot)
 importFrom(graphics,points)
 importFrom(stats,model.frame)
 importFrom(stats,rbinom)
 importFrom(stats,rnorm)
 useDynLib(CVE, .registration = TRUE)
--- a/CVE_C/R/CVE.R
+++ b/CVE_C/R/CVE.R
@ -0,0 +1,223 @@
 #' Conditional Variance Estimator (CVE)
 #'
 #' Conditional Variance Estimator for Sufficient Dimension
 #'  Reduction
 #'
 #' TODO: And some details
 #'
 #'
 #' @references Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
 #'
 #' @docType package
 #' @author Loki
 #' @useDynLib CVE, .registration = TRUE
 "_PACKAGE"
 #' Implementation of the CVE method.
 #'
 #' Conditional Variance Estimator (CVE) is a novel sufficient dimension
 #' reduction (SDR) method assuming a model
 #' \deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
 #' where B'X is a lower dimensional projection of the predictors.
 #'
 #' @param formula Formel for the regression model defining `X`, `Y`.
 #'  See: \code{\link{formula}}.
 #' @param data data.frame holding data for formula.
 #' @param method The different only differe in the used optimization.
 #'  All of them are Gradient based optimization on a Stiefel manifold.
 #' \itemize{
 #'      \item "simple" Simple reduction of stepsize.
 #'      \item "sgd" stocastic gradient decent.
 #'      \item TODO: further
 #' }
 #' @param ... Further parameters depending on the used method.
 #' @examples
 #' library(CVE)
 #'
 #' # sample dataset
 #' ds <- dataset("M5")
 #'
 #' # call ´cve´ with default method (aka "simple")
 #' dr.simple <- cve(ds$Y ~ ds$X, k = ncol(ds$B))
 #' # plot optimization history (loss via iteration)
 #' plot(dr.simple, main = "CVE M5 simple")
 #'
 #' # call ´cve´ with method "linesearch" using ´data.frame´ as data.
 #' data <- data.frame(Y = ds$Y, X = ds$X)
 #' # Note: ´Y, X´ are NOT defined, they are extracted from ´data´.
 #' dr.linesearch <- cve(Y ~ ., data, method = "linesearch", k = ncol(ds$B))
 #' plot(dr.linesearch, main = "CVE M5 linesearch")
 #'
 #' @references Fertl L., Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
 #'
 #' @seealso \code{\link{formula}}. For a complete parameters list (dependent on
 #'  the method) see \code{\link{cve_simple}}, \code{\link{cve_sgd}}
 #' @import stats
 #' @importFrom stats model.frame
 #' @export
 cve <- function(formula, data, method = "simple", max.dim = 10, ...) {
    # check for type of `data` if supplied and set default
    if (missing(data)) {
        data <- environment(formula)
    } else if (!is.data.frame(data)) {
        stop('Parameter `data` must be a `data.frame` or missing.')
    }
    # extract `X`, `Y` from `formula` with `data`
    model <- stats::model.frame(formula, data)
    X <- as.matrix(model[,-1, drop = FALSE])
    Y <- as.matrix(model[, 1, drop = FALSE])
    # pass extracted data on to [cve.call()]
    dr <- cve.call(X, Y, method = method, ...)
    # overwrite `call` property from [cve.call()]
    dr$call <- match.call()
    return(dr)
 }
 #' @param nObs as describet in the Paper.
 #' @param X Data
 #' @param Y Responces
 #' @param nObs Like in the paper.
 #' @param k guess for SDR dimension.
 #' @param ... Method specific parameters.
 #' @rdname cve
 #' @export
 cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5,
                     min.dim = 1, max.dim = 10, k, ...) {
    # parameter checking
    if (!is.matrix(X)) {
        stop('X should be a matrices.')
    }
    if (is.matrix(Y)) {
        Y <- as.vector(Y)
    }
    if (nrow(X) != length(Y)) {
        stop('Rows of X and number of Y elements are not compatible.')
    }
    if (ncol(X) < 2) {
        stop('X is one dimensional, no need for dimension reduction.')
    }
    if (!missing(k)) {
        min.dim <- as.integer(k)
        max.dim <- as.integer(k)
    } else {
        min.dim <- as.integer(min.dim)
        max.dim <- as.integer(min(max.dim, ncol(X) - 1L))
    }
    if (min.dim > max.dim) {
        stop('`min.dim` bigger `max.dim`.')
    }
    if (max.dim >= ncol(X)) {
        stop('`max.dim` must be smaller than `ncol(X)`.')
    }
    # Call specified method.
    method <- tolower(method)
    call <- match.call()
    dr <- list()
    for (k in min.dim:max.dim) {
        if (method == 'simple') {
            dr.k <- cve_simple(X, Y, k, nObs = nObs, ...)
        } else if (method == 'linesearch') {
            dr.k <- cve_linesearch(X, Y, k, nObs = nObs, ...)
        } else if (method == 'sgd') {
            dr.k <- cve_sgd(X, Y, k, nObs = nObs, ...)
        } else {
            stop('Got unknown method.')
        }
        dr.k$k <- k
        class(dr.k) <- "cve.k"
        dr[[k]] <- dr.k
    }
    # augment result information
    dr$method <- method
    dr$call <- call
    class(dr) <- "cve"
    return(dr)
 }
 # TODO: write summary
 # summary.cve <- function() {
 #     # code #
 # }
 #' Ploting helper for objects of class \code{cve}.
 #'
 #' @param x Object of class \code{cve} (result of [cve()]).
 #' @param content Specifies what to plot:
 #' \itemize{
 #'      \item "history" Plots the loss history from stiefel optimization
 #'          (default).
 #'      \item ... TODO: add (if there are any)
 #' }
 #' @param ... Pass through parameters to [plot()] and [lines()]
 #'
 #' @usage ## S3 method for class 'cve'
 #' plot(x, content = "history", ...)
 #' @seealso see \code{\link{par}} for graphical parameters to pass through
 #'      as well as \code{\link{plot}} for standard plot utility.
 #' @importFrom graphics plot lines points
 #' @method plot cve
 #' @export
 plot.cve <- function(x, ...) {
    # H <- x$history
    # H_1 <- H[!is.na(H[, 1]), 1]
    # defaults <- list(
    #     main = "History",
    #     xlab = "Iterations i",
    #     ylab = expression(loss == L[n](V^{(i)})),
    #     xlim = c(1, nrow(H)),
    #     ylim = c(0, max(H)),
    #     type = "l"
    # )
    # call.plot <- match.call()
    # keys <- names(defaults)
    # keys <- keys[match(keys, names(call.plot)[-1], nomatch = 0) == 0]
    # for (key in keys) {
    #     call.plot[[key]] <- defaults[[key]]
    # }
    # call.plot[[1L]] <- quote(plot)
    # call.plot$x <- quote(1:length(H_1))
    # call.plot$y <- quote(H_1)
    # eval(call.plot)
    # if (ncol(H) > 1) {
    #     for (i in 2:ncol(H)) {
    #         H_i <- H[H[, i] > 0, i]
    #         lines(1:length(H_i), H_i)
    #     }
    # }
    # x.ends <- apply(H, 2, function(h) { length(h[!is.na(h)]) })
    # y.ends <- apply(H, 2, function(h) { tail(h[!is.na(h)], n=1) })
    # points(x.ends, y.ends)
 }
 #' Prints a summary of a \code{cve} result.
 #' @param object Instance of 'cve' as return of \code{cve}.
 #' @method summary cve
 #' @export
 summary.cve <- function(object, ...) {
    cat('Summary of CVE result - Method: "', object$method, '"\n',
        '\n',
        'Dataset size:   ', nrow(object$X), '\n',
        'Data Dimension: ', ncol(object$X), '\n',
        'SDR Dimension:  ', object$k, '\n',
        'loss:           ', object$loss, '\n',
        '\n',
        'Called via:\n',
        '    ',
        sep='')
    print(object$call)
 }
--- a/CVE_C/R/cve_linesearch.R
+++ b/CVE_C/R/cve_linesearch.R
@ -0,0 +1,169 @@
 #' Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
 #' conditions.
 #'
 #' @keywords internal
 #' @export
 cve_linesearch <- function(X, Y, k,
                           nObs = sqrt(nrow(X)),
                           h = NULL,
                           tau = 1.0,
                           tol = 1e-3,
                           rho1 = 0.1,
                           rho2 = 0.9,
                           slack = 0,
                           epochs = 50L,
                           attempts = 10L,
                           max.linesearch.iter = 10L,
                           logger = NULL
 ) {
    # Set `grad` functions environment to enable if to find this environments
    # local variabels, needed to enable the manipulation of this local variables
    # from within `grad`.
    environment(grad) <- environment()
    # Get dimensions.
    n <- nrow(X)
    p <- ncol(X)
    q <- p - k
    # Save initial learning rate `tau`.
    tau.init <- tau
    # Addapt tolearance for break condition.
    tol <- sqrt(2 * q) * tol
    # Estaimate bandwidth if not given.
    if (missing(h) | !is.numeric(h)) {
        h <- estimate.bandwidth(X, k, nObs)
    }
    # Compute persistent data.
    # Compute lookup indexes for symmetrie, lower/upper
    # triangular parts and vectorization.
    pair.index <- elem.pairs(seq(n))
    i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
    j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
    # Matrix of vectorized indices. (vec(index) -> seq)
    index <- matrix(seq(n * n), n, n)
    lower <- index[lower.tri(index)]
    upper <- t(index)[lower]
    # Create all pairewise differences of rows of `X`.
    X_diff <- X[i, , drop = F] - X[j, , drop = F]
    # Identity matrix.
    I_p <- diag(1, p)
    # Init tracking of current best (according multiple attempts).
    V.best <- NULL
    loss.best <- Inf
    # Start loop for multiple attempts.
    for (attempt in 1:attempts) {
        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
        # optimization start value.
        V <- rStiefl(p, q)
        # Initial loss and gradient.
        loss <- Inf
        G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
        # Set last loss (aka, loss after applying the step).
        loss.last <- loss
        # Call logger with initial values before starting optimization.
        if (is.function(logger)) {
            epoch <- 0 # Set epoch count to 0 (only relevant for logging).
            error <- NA
            logger(environment())
        }
        ## Start optimization loop.
        for (epoch in 1:epochs) {
            # Cayley transform matrix `A`
            A <- (G %*% t(V)) - (V %*% t(G))
            # Directional derivative of the loss at current position, given
            # as `Tr(G^T \cdot A \cdot V)`.
            loss.prime <- -0.5 * norm(A, type = 'F')^2
            # Linesearch
            tau.upper <- Inf
            tau.lower <- 0
            tau <- tau.init
            for (iter in 1:max.linesearch.iter) {
                # Apply learning rate `tau`.
                A.tau <- (tau / 2) * A
                # Parallet transport (on Stiefl manifold) into direction of `G`.
                inv <- solve(I_p + A.tau)
                V.tau <- inv %*% ((I_p - A.tau) %*% V)
                # Loss at position after a step.
                loss <- Inf # aka loss.tau
                G.tau <- grad(X, Y, V.tau, h, loss.out = TRUE, persistent = TRUE)
                # Armijo condition.
                if (loss > loss.last + (rho1 * tau * loss.prime)) {
                    tau.upper <- tau
                    tau <- (tau.lower + tau.upper) / 2
                    next()
                }
                V.prime.tau <- -0.5 * inv %*% A %*% (V + V.tau)
                loss.prime.tau <- sum(G * V.prime.tau) # Tr(grad(tau)^T \cdot Y^'(tau))
                # Wolfe condition.
                if (loss.prime.tau < rho2 * loss.prime) {
                    tau.lower <- tau
                    if (tau.upper == Inf) {
                        tau <- 2 * tau.lower
                    } else {
                        tau <- (tau.lower + tau.upper) / 2
                    }
                } else {
                    break()
                }
            }
            # Compute error.
            error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
            # Check break condition (epoch check to skip ignored gradient calc).
            # Note: the devision by `sqrt(2 * k)` is included in `tol`.
            if (error < tol | epoch >= epochs) {
                # take last step and stop optimization.
                V <- V.tau
                # Final call to the logger before stopping optimization
                if (is.function(logger)) {
                    G <- G.tau
                    logger(environment())
                }
                break()
            }
            # Perform the step and remember previous loss.
            V <- V.tau
            loss.last <- loss
            G <- G.tau
            # Log after taking current step.
            if (is.function(logger)) {
                logger(environment())
            }
        }
        # Check if current attempt improved previous ones
        if (loss < loss.best) {
            loss.best <- loss
            V.best <- V
        }
    }
    return(list(
        loss = loss.best,
        V = V.best,
        B = null(V.best),
        h = h
    ))
 }
--- a/CVE_C/R/cve_sgd.R
+++ b/CVE_C/R/cve_sgd.R
@ -0,0 +1,129 @@
 #' Simple implementation of the CVE method. 'Simple' means that this method is
 #' a classic GD method unsing no further tricks.
 #'
 #' @keywords internal
 #' @export
 cve_sgd <- function(X, Y, k,
                    nObs = sqrt(nrow(X)),
                    h = NULL,
                    tau = 0.01,
                    tol = 1e-3,
                    epochs = 50L,
                    batch.size = 16L,
                    attempts = 10L,
                    logger = NULL
 ) {
    # Set `grad` functions environment to enable if to find this environments
    # local variabels, needed to enable the manipulation of this local variables
    # from within `grad`.
    environment(grad) <- environment()
    # Get dimensions.
    n <- nrow(X) # Number of samples.
    p <- ncol(X) # Data dimensions
    q <- p - k   # Complement dimension of the SDR space.
    # Save initial learning rate `tau`.
    tau.init <- tau
    # Addapt tolearance for break condition.
    tol <- sqrt(2 * q) * tol
    # Estaimate bandwidth if not given.
    if (missing(h) || !is.numeric(h)) {
        h <- estimate.bandwidth(X, k, nObs)
    }
    # Compute persistent data.
    # Compute lookup indexes for symmetrie, lower/upper
    # triangular parts and vectorization.
    pair.index <- elem.pairs(seq(n))
    i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
    j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
    # Index of vectorized matrix, for lower and upper triangular part.
    lower <- ((i - 1) * n) + j
    upper <- ((j - 1) * n) + i
    # Create all pairewise differences of rows of `X`.
    X_diff <- X[i, , drop = F] - X[j, , drop = F]
    # Identity matrix.
    I_p <- diag(1, p)
    # Init a list of data indices (shuffled for batching).
    indices <- seq(n)
    # Init tracking of current best (according multiple attempts).
    V.best <- NULL
    loss.best <- Inf
    # Start loop for multiple attempts.
    for (attempt in 1:attempts) {
        # Reset learning rate `tau`.
        tau <- tau.init
        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
        # optimization start value.
        V <- rStiefl(p, q)
        # Keep track of last `V` for computing error after an epoch.
        V.last <- V
        if (is.function(logger)) {
            loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
            error <- NA
            epoch <- 0
            logger(environment())
        }
        # Repeat `epochs` times
        for (epoch in 1:epochs) {
            # Shuffle batches
            batch.shuffle <- sample(indices)
            # Make a step for each batch.
            for (batch.start in seq(1, n, batch.size)) {
                # Select batch data indices.
                batch.end <- min(batch.start + batch.size - 1, length(batch.shuffle))
                batch <- batch.shuffle[batch.start:batch.end]
                # Compute batch gradient.
                loss <- NULL
                G <- grad(X[batch, ], Y[batch], V, h, loss.out = TRUE)
                # Cayley transform matrix.
                A <- (G %*% t(V)) - (V %*% t(G))
                # Apply learning rate `tau`.
                A.tau <- tau * A
                # Parallet transport (on Stiefl manifold) into direction of `G`.
                V <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
            }
            # And the error for the history.
            error <- norm(V.last %*% t(V.last) - V %*% t(V), type = "F")
            V.last <- V
            if (is.function(logger)) {
                # Compute loss at end of epoch for logging.
                loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
                logger(environment())
            }
            # Check break condition.
            if (error < tol) {
                break()
            }
        }
        # Compute actual loss after finishing for comparing multiple attempts.
        loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
        # After each attempt, check if last attempt reached a better result.
        if (loss < loss.best) {
            loss.best <- loss
            V.best <- V
        }
    }
    return(list(
        loss = loss.best,
        V = V.best,
        B = null(V.best),
        h = h
    ))
 }
--- a/CVE_C/R/cve_simple.R
+++ b/CVE_C/R/cve_simple.R
@ -0,0 +1,141 @@
 #' Simple implementation of the CVE method. 'Simple' means that this method is
 #' a classic GD method unsing no further tricks.
 #'
 #' @keywords internal
 #' @export
 cve_simple <- function(X, Y, k,
                       nObs = sqrt(nrow(X)),
                       h = NULL,
                       tau = 1.0,
                       tol = 1e-3,
                       slack = 0,
                       epochs = 50L,
                       attempts = 10L,
                       logger = NULL
 ) {
    # Set `grad` functions environment to enable if to find this environments
    # local variabels, needed to enable the manipulation of this local variables
    # from within `grad`.
    environment(grad) <- environment()
    # Get dimensions.
    n <- nrow(X) # Number of samples.
    p <- ncol(X) # Data dimensions
    q <- p - k   # Complement dimension of the SDR space.
    # Save initial learning rate `tau`.
    tau.init <- tau
    # Addapt tolearance for break condition.
    tol <- sqrt(2 * q) * tol
    # Estaimate bandwidth if not given.
    if (missing(h) || !is.numeric(h)) {
        h <- estimate.bandwidth(X, k, nObs)
    }
    # Compute persistent data.
    # Compute lookup indexes for symmetrie, lower/upper
    # triangular parts and vectorization.
    pair.index <- elem.pairs(seq(n))
    i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
    j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
    # Index of vectorized matrix, for lower and upper triangular part.
    lower <- ((i - 1) * n) + j
    upper <- ((j - 1) * n) + i
    # Create all pairewise differences of rows of `X`.
    X_diff <- X[i, , drop = F] - X[j, , drop = F]
    # Identity matrix.
    I_p <- diag(1, p)
    # Init tracking of current best (according multiple attempts).
    V.best <- NULL
    loss.best <- Inf
    # Start loop for multiple attempts.
    for (attempt in 1:attempts) {
        # Reset learning rate `tau`.
        tau <- tau.init
        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
        # optimization start value.
        V <- rStiefl(p, q)
        # Initial loss and gradient.
        loss <- Inf
        G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
        # Set last loss (aka, loss after applying the step).
        loss.last <- loss
        # Cayley transform matrix `A`
        A <- (G %*% t(V)) - (V %*% t(G))
        # Call logger with initial values before starting optimization.
        if (is.function(logger)) {
            epoch <- 0 # Set epoch count to 0 (only relevant for logging).
            error <- NA
            logger(environment())
        }
        ## Start optimization loop.
        for (epoch in 1:epochs) {
            # Apply learning rate `tau`.
            A.tau <- tau * A
            # Parallet transport (on Stiefl manifold) into direction of `G`.
            V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
            # Loss at position after a step.
            loss <- grad(X, Y, V.tau, h, loss.only = TRUE, persistent = TRUE)
            # Check if step is appropriate, iff not reduce learning rate.
            if ((loss - loss.last) > slack * loss.last) {
                tau <- tau / 2
                next() # Keep position and try with smaller `tau`.
            }
            # Compute error.
            error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
            # Check break condition (epoch check to skip ignored gradient calc).
            # Note: the devision by `sqrt(2 * k)` is included in `tol`.
            if (error < tol || epoch >= epochs) {
                # take last step and stop optimization.
                V <- V.tau
                # Call logger last time befor stoping.
                if (is.function(logger)) {
                    logger(environment())
                }
                break()
            }
            # Perform the step and remember previous loss.
            V <- V.tau
            loss.last <- loss
            # Call logger after taking a step.
            if (is.function(logger)) {
                logger(environment())
            }
            # Compute gradient at new position.
            G <- grad(X, Y, V, h, persistent = TRUE)
            # Cayley transform matrix `A`
            A <- (G %*% t(V)) - (V %*% t(G))
        }
        # Check if current attempt improved previous ones
        if (loss < loss.best) {
            loss.best <- loss
            V.best <- V
        }
    }
    return(list(
        loss = loss.best,
        V = V.best,
        B = null(V.best),
        h = h
    ))
 }
--- a/CVE_C/R/datasets.R
+++ b/CVE_C/R/datasets.R
@ -0,0 +1,109 @@
 #' Generates test datasets.
 #'
 #' Provides sample datasets. There are 5 different datasets named
 #' M1, M2, M3, M4 and M5 describet in the paper references below.
 #' The general model is given by:
 #' \deqn{Y ~ g(B'X) + \epsilon}
 #'
 #' @param name One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}
 #' @param n nr samples
 #' @param p Dim. of random variable \code{X}.
 #' @param p.mix Only for \code{"M4"}, see: below.
 #' @param lambda Only for \code{"M4"}, see: below.
 #'
 #' @return List with elements
 #' \itemize{
 #'      \item{X}{data}
 #'      \item{Y}{response}
 #'      \item{B}{Used dim-reduction matrix}
 #'      \item{name}{Name of the dataset (name parameter)}
 #' }
 #'
 #' @section M1:
 #' The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
 #' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
 #' The link function \eqn{g} is given as
 #' \deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + 0.5\epsilon}{g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + 0.5 epsilon}
 #' @section M2:
 #' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a default of \eqn{n = 200} data points.
 #' The link function \eqn{g} is given as
 #' \deqn{g(x) = x_1 x_2^2 + 0.5\epsilon}{g(x) = x_1 x_2^2 + 0.5 epsilon}
 #' @section M3:
 #' TODO:
 #' @section M4:
 #' TODO:
 #' @section M5:
 #' TODO:
 #'
 #' @import stats
 #' @importFrom stats rnorm rbinom
 #' @export
 dataset <- function(name = "M1", n, B, p.mix = 0.3, lambda = 1.0) {
    # validate parameters
    stopifnot(name %in% c("M1", "M2", "M3", "M4", "M5"))
    # set default values if not supplied
    if (missing(n)) {
        n <- if (name %in% c("M1", "M2")) 200 else if (name != "M5") 100 else 42
    }
    if (missing(B)) {
        p <- 12
        if (name == "M1") {
            B <- cbind(
                c( 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0),
                c( 1,-1, 1,-1, 1,-1, 0, 0, 0, 0, 0, 0)
            ) / sqrt(6)
        } else if (name == "M2") {
            B <- cbind(
                c(c(1, 0), rep(0, 10)),
                c(c(0, 1), rep(0, 10))
            )
        } else {
            B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, 6)), 12, 1)
        }
    } else {
        p <- dim(B)[1]
        # validate col. nr to match dataset `k = dim(B)[2]`
        stopifnot(
            name %in% c("M1", "M2") && dim(B)[2] == 2,
            name %in% c("M3", "M4", "M5") && dim(B)[2] == 1
        )
    }
    # set link function `g` for model `Y ~ g(B'X) + epsilon`
    if (name == "M1") {
        g <- function(BX) { BX[1] / (0.5 + (BX[2] + 1.5)^2) }
    } else if (name == "M2") {
        g <- function(BX) { BX[1] * BX[2]^2 }
    } else if (name %in% c("M3", "M4")) {
        g <- function(BX) { cos(BX[1]) }
    } else { # name == "M5"
        g <- function(BX) { 2 * log(abs(BX[1]) + 1) }
    }
    # compute X
    if (name != "M4") {
        # compute root of the covariance matrix according the dataset
        if (name %in% c("M1", "M3")) {
            # Variance-Covariance structure for `X ~ N_p(0, \Sigma)` with
            # `\Sigma_{i, j} = 0.5^{|i - j|}`.
            Sigma <- matrix(0.5^abs(kronecker(1:p, 1:p, '-')), p, p)
            # decompose Sigma to Sigma.root^T Sigma.root = Sigma for usage in creation of `X`
            Sigma.root <- chol(Sigma)
        } else { # name %in% c("M2", "M5")
            Sigma.root <- diag(rep(1, p)) # d-dim identity
        }
        # data `X` as multivariate random normal variable with
        # variance matrix `Sigma`.
        X <- replicate(p, rnorm(n, 0, 1)) %*% Sigma.root
    } else { # name == "M4"
        X <- t(replicate(100, rep((1 - 2 * rbinom(1, 1, p.mix)) * lambda, p) + rnorm(p, 0, 1)))
    }
    # responce `y ~ g(B'X) + epsilon` with `epsilon ~ N(0, 1 / 2)`
    Y <- apply(X, 1, function(X_i) {
        g(t(B) %*% X_i) + rnorm(1, 0, 0.5)
    })
    return(list(X = X, Y = Y, B = B, name = name))
 }
--- a/CVE_C/R/estimateBandwidth.R
+++ b/CVE_C/R/estimateBandwidth.R
@ -0,0 +1,27 @@
 #' Estimated bandwidth for CVE.
 #'
 #' Estimates a propper bandwidth \code{h} according
 #' \deqn{%
 #' h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
 #' h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
 #'
 #' @param X data matrix of dimension (n x p) with n data points X_i of dimension
 #'  q. Therefor each row represents a datapoint of dimension p.
 #' @param k Guess for rank(B).
 #' @param nObs Ether numeric of a function. If specified as numeric value
 #'  its used in the computation of the bandwidth directly. If its a function
 #'  `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not
 #'  supplied at all is to use \code{nObs <- nrow(x)^0.5}.
 #'
 #' @seealso [\code{\link{qchisq}}]
 #' @export
 estimate.bandwidth <- function(X, k, nObs) {
    n <- nrow(X)
    p <- ncol(X)
    X_centered <- scale(X, center=TRUE, scale=FALSE)
    Sigma <- (1 / n) * t(X_centered) %*% X_centered
    quantil <- qchisq((nObs - 1) / (n - 1), k)
    return(2 * quantil * sum(diag(Sigma)) / p)
 }
--- a/CVE_C/R/gradient.R
+++ b/CVE_C/R/gradient.R
@ -0,0 +1,48 @@
 #' Compute get gradient of `L(V)` given a dataset `X`.
 #'
 #' @param X Data matrix.
 #' @param Y Responce.
 #' @param V Position to compute the gradient at, aka point on Stiefl manifold.
 #' @param h Bandwidth
 #' @param loss.out Iff \code{TRUE} loss will be written to parent environment.
 #' @param loss.only Boolean to only compute the loss, of \code{TRUE} a single
 #'  value loss is returned and \code{envir} is ignored.
 #' @param persistent Determines if data indices and dependent calculations shall
 #'  be reused from the parent environment. ATTENTION: Do NOT set this flag, only
 #'  intended for internal usage by carefully aligned functions!
 #' @keywords internal
 #' @export
 grad <- function(X, Y, V, h,
                 loss.out = FALSE,
                 loss.only = FALSE,
                 persistent = FALSE) {
    # Get number of samples and dimension.
    n <- nrow(X)
    p <- ncol(X)
    if (!persistent) {
        # Compute lookup indexes for symmetrie, lower/upper
        # triangular parts and vectorization.
        pair.index <- elem.pairs(seq(n))
        i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
        j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
        # Index of vectorized matrix, for lower and upper triangular part.
        lower <- ((i - 1) * n) + j
        upper <- ((j - 1) * n) + i
        # Create all pairewise differences of rows of `X`.
        X_diff <- X[i, , drop = F] - X[j, , drop = F]
    }
    out <- .Call("grad_c", PACKAGE = "CVE",
                 X, X_diff, as.double(Y), V, as.double(h));
    if (loss.only) {
        return(out$loss)
    }
    if (loss.out) {
        loss <<- out$loss
    }
    return(out$G)
 }
--- a/CVE_C/R/gridSearch.R
+++ b/CVE_C/R/gridSearch.R
@ -0,0 +1,43 @@
 #' Performs a grid search for parameters over a parameter grid.
 #' @examples
 #' args <- list(
 #'     h = c(0.05, 0.1, 0.2),
 #'     method = c("simple", "sgd"),
 #'     tau = c(0.5, 0.1, 0.01)
 #' )
 #' cve.grid.search(args)
 #' @export
 cve.grid.search <- function(X, Y, k, args) {
    args$stringsAsFactors = FALSE
    args$KEEP.OUT.ATTRS = FALSE
    grid <- do.call(expand.grid, args)
    grid.length <- length(grid[[1]])
    print(grid)
    for (i in 1:grid.length) {
        arguments <- as.list(grid[i, ])
        # Set required arguments
        arguments$X <- X
        arguments$Y <- Y
        arguments$k <- k
        # print(arguments)
        dr <- do.call(cve.call, arguments)
        print(dr$loss)
    }
 }
 # ds <- dataset()
 # X <- ds$X
 # Y <- ds$Y
 # (k <- ncol(ds$B))
 # args <- list(
 #     h = c(0.05, 0.1, 0.2),
 #     method = c("simple", "sgd"),
 #     tau = c(0.5, 0.1, 0.01),
 #     attempts = c(1L)
 # )
 # cve.grid.search(X, Y, k, args)
--- a/CVE_C/R/util.R
+++ b/CVE_C/R/util.R
@ -0,0 +1,40 @@
 #' Samples uniform from the Stiefel Manifold
 #'
 #' @param p row dim.
 #' @param q col dim.
 #' @return `(p, q)` semi-orthogonal matrix
 #' @examples
 #'  V <- rStiefel(6, 4)
 #' @export
 rStiefl <- function(p, q) {
    return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
 }
 #' Null space basis of given matrix `V`
 #'
 #' @param V `(p, q)` matrix
 #' @return Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`.
 #' @keywords internal
 #' @export
 null <- function(V) {
    tmp <- qr(V)
    set <- if(tmp$rank == 0L) seq_len(ncol(V)) else -seq_len(tmp$rank)
    return(qr.Q(tmp, complete=TRUE)[, set, drop=FALSE])
 }
 #' Creates a (numeric) matrix where each column contains
 #' an element to element matching.
 #' @param elements numeric vector of elements to match
 #' @return matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`.
 #' @keywords internal
 #' @examples
 #'  elem.pairs(seq.int(2, 5))
 #' @export
 elem.pairs <- function(elements) {
    # Number of elements to match.
    n <- length(elements)
    # Create all combinations.
    pairs <- rbind(rep(elements, each=n), rep(elements, n))
    # Select unique combinations without self interaction.
    return(pairs[, pairs[1, ] < pairs[2, ]])
 }
--- a/CVE_C/inst/doc/CVE_paper.pdf
+++ b/CVE_C/inst/doc/CVE_paper.pdf
--- a/CVE_C/man/CVE-package.Rd
+++ b/CVE_C/man/CVE-package.Rd
@ -0,0 +1,20 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/CVE.R
 \docType{package}
 \name{CVE-package}
 \alias{CVE}
 \alias{CVE-package}
 \title{Conditional Variance Estimator (CVE)}
 \description{
 Conditional Variance Estimator for Sufficient Dimension
 Reduction
 }
 \details{
 TODO: And some details
 }
 \references{
 Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
 }
 \author{
 Loki
 }
--- a/CVE_C/man/cve.Rd
+++ b/CVE_C/man/cve.Rd
@ -0,0 +1,71 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/CVE.R
 \name{cve}
 \alias{cve}
 \alias{cve.call}
 \title{Implementation of the CVE method.}
 \usage{
 cve(formula, data, method = "simple", max.dim = 10, ...)
 cve.call(X, Y, method = "simple", nObs = nrow(X)^0.5, min.dim = 1,
  max.dim = 10, k, ...)
 }
 \arguments{
 \item{formula}{Formel for the regression model defining `X`, `Y`.
 See: \code{\link{formula}}.}
 \item{data}{data.frame holding data for formula.}
 \item{method}{The different only differe in the used optimization.
 All of them are Gradient based optimization on a Stiefel manifold.
 \itemize{
     \item "simple" Simple reduction of stepsize.
     \item "sgd" stocastic gradient decent.
     \item TODO: further
 }}
 \item{...}{Further parameters depending on the used method.}
 \item{X}{Data}
 \item{Y}{Responces}
 \item{nObs}{as describet in the Paper.}
 \item{k}{guess for SDR dimension.}
 \item{nObs}{Like in the paper.}
 \item{...}{Method specific parameters.}
 }
 \description{
 Conditional Variance Estimator (CVE) is a novel sufficient dimension
 reduction (SDR) method assuming a model
 \deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
 where B'X is a lower dimensional projection of the predictors.
 }
 \examples{
 library(CVE)
 # sample dataset
 ds <- dataset("M5")
 # call ´cve´ with default method (aka "simple")
 dr.simple <- cve(ds$Y ~ ds$X, k = ncol(ds$B))
 # plot optimization history (loss via iteration)
 plot(dr.simple, main = "CVE M5 simple")
 # call ´cve´ with method "linesearch" using ´data.frame´ as data.
 data <- data.frame(Y = ds$Y, X = ds$X)
 # Note: ´Y, X´ are NOT defined, they are extracted from ´data´.
 dr.linesearch <- cve(Y ~ ., data, method = "linesearch", k = ncol(ds$B))
 plot(dr.linesearch, main = "CVE M5 linesearch")
 }
 \references{
 Fertl L., Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
 }
 \seealso{
 \code{\link{formula}}. For a complete parameters list (dependent on
 the method) see \code{\link{cve_simple}}, \code{\link{cve_sgd}}
 }
--- a/CVE_C/man/cve.grid.search.Rd
+++ b/CVE_C/man/cve.grid.search.Rd
@ -0,0 +1,19 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/gridSearch.R
 \name{cve.grid.search}
 \alias{cve.grid.search}
 \title{Performs a grid search for parameters over a parameter grid.}
 \usage{
 cve.grid.search(X, Y, k, args)
 }
 \description{
 Performs a grid search for parameters over a parameter grid.
 }
 \examples{
 args <- list(
    h = c(0.05, 0.1, 0.2),
    method = c("simple", "sgd"),
    tau = c(0.5, 0.1, 0.01)
 )
 cve.grid.search(args)
 }
--- a/CVE_C/man/cve_linesearch.Rd
+++ b/CVE_C/man/cve_linesearch.Rd
@ -0,0 +1,16 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cve_linesearch.R
 \name{cve_linesearch}
 \alias{cve_linesearch}
 \title{Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
 conditions.}
 \usage{
 cve_linesearch(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
  tol = 0.001, rho1 = 0.1, rho2 = 0.9, slack = 0, epochs = 50L,
  attempts = 10L, max.linesearch.iter = 10L, logger = NULL)
 }
 \description{
 Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
 conditions.
 }
 \keyword{internal}
--- a/CVE_C/man/cve_sgd.Rd
+++ b/CVE_C/man/cve_sgd.Rd
@ -0,0 +1,16 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cve_sgd.R
 \name{cve_sgd}
 \alias{cve_sgd}
 \title{Simple implementation of the CVE method. 'Simple' means that this method is
 a classic GD method unsing no further tricks.}
 \usage{
 cve_sgd(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 0.01,
  tol = 0.001, epochs = 50L, batch.size = 16L, attempts = 10L,
  logger = NULL)
 }
 \description{
 Simple implementation of the CVE method. 'Simple' means that this method is
 a classic GD method unsing no further tricks.
 }
 \keyword{internal}
--- a/CVE_C/man/cve_simple.Rd
+++ b/CVE_C/man/cve_simple.Rd
@ -0,0 +1,16 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cve_simple.R
 \name{cve_simple}
 \alias{cve_simple}
 \title{Simple implementation of the CVE method. 'Simple' means that this method is
 a classic GD method unsing no further tricks.}
 \usage{
 cve_simple(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
  tol = 0.001, slack = 0, epochs = 50L, attempts = 10L,
  logger = NULL)
 }
 \description{
 Simple implementation of the CVE method. 'Simple' means that this method is
 a classic GD method unsing no further tricks.
 }
 \keyword{internal}
--- a/CVE_C/man/dataset.Rd
+++ b/CVE_C/man/dataset.Rd
@ -0,0 +1,64 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/datasets.R
 \name{dataset}
 \alias{dataset}
 \title{Generates test datasets.}
 \usage{
 dataset(name = "M1", n, B, p.mix = 0.3, lambda = 1)
 }
 \arguments{
 \item{name}{One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}}
 \item{n}{nr samples}
 \item{p.mix}{Only for \code{"M4"}, see: below.}
 \item{lambda}{Only for \code{"M4"}, see: below.}
 \item{p}{Dim. of random variable \code{X}.}
 }
 \value{
 List with elements
 \itemize{
     \item{X}{data}
     \item{Y}{response}
     \item{B}{Used dim-reduction matrix}
     \item{name}{Name of the dataset (name parameter)}
 }
 }
 \description{
 Provides sample datasets. There are 5 different datasets named
 M1, M2, M3, M4 and M5 describet in the paper references below.
 The general model is given by:
 \deqn{Y ~ g(B'X) + \epsilon}
 }
 \section{M1}{
 The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
 dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
 The link function \eqn{g} is given as
 \deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + 0.5\epsilon}{g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + 0.5 epsilon}
 }
 \section{M2}{
 \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a default of \eqn{n = 200} data points.
 The link function \eqn{g} is given as
 \deqn{g(x) = x_1 x_2^2 + 0.5\epsilon}{g(x) = x_1 x_2^2 + 0.5 epsilon}
 }
 \section{M3}{
 TODO:
 }
 \section{M4}{
 TODO:
 }
 \section{M5}{
 TODO:
 }
--- a/CVE_C/man/elem.pairs.Rd
+++ b/CVE_C/man/elem.pairs.Rd
@ -0,0 +1,23 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/util.R
 \name{elem.pairs}
 \alias{elem.pairs}
 \title{Creates a (numeric) matrix where each column contains
 an element to element matching.}
 \usage{
 elem.pairs(elements)
 }
 \arguments{
 \item{elements}{numeric vector of elements to match}
 }
 \value{
 matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`.
 }
 \description{
 Creates a (numeric) matrix where each column contains
 an element to element matching.
 }
 \examples{
 elem.pairs(seq.int(2, 5))
 }
 \keyword{internal}
--- a/CVE_C/man/estimate.bandwidth.Rd
+++ b/CVE_C/man/estimate.bandwidth.Rd
@ -0,0 +1,28 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/estimateBandwidth.R
 \name{estimate.bandwidth}
 \alias{estimate.bandwidth}
 \title{Estimated bandwidth for CVE.}
 \usage{
 estimate.bandwidth(X, k, nObs)
 }
 \arguments{
 \item{X}{data matrix of dimension (n x p) with n data points X_i of dimension
 q. Therefor each row represents a datapoint of dimension p.}
 \item{k}{Guess for rank(B).}
 \item{nObs}{Ether numeric of a function. If specified as numeric value
 its used in the computation of the bandwidth directly. If its a function
 `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not
 supplied at all is to use \code{nObs <- nrow(x)^0.5}.}
 }
 \description{
 Estimates a propper bandwidth \code{h} according
 \deqn{%
 h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
 h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
 }
 \seealso{
 [\code{\link{qchisq}}]
 }
--- a/CVE_C/man/grad.Rd
+++ b/CVE_C/man/grad.Rd
@ -0,0 +1,31 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/gradient.R
 \name{grad}
 \alias{grad}
 \title{Compute get gradient of `L(V)` given a dataset `X`.}
 \usage{
 grad(X, Y, V, h, loss.out = FALSE, loss.only = FALSE,
  persistent = FALSE)
 }
 \arguments{
 \item{X}{Data matrix.}
 \item{Y}{Responce.}
 \item{V}{Position to compute the gradient at, aka point on Stiefl manifold.}
 \item{h}{Bandwidth}
 \item{loss.out}{Iff \code{TRUE} loss will be written to parent environment.}
 \item{loss.only}{Boolean to only compute the loss, of \code{TRUE} a single
 value loss is returned and \code{envir} is ignored.}
 \item{persistent}{Determines if data indices and dependent calculations shall
 be reused from the parent environment. ATTENTION: Do NOT set this flag, only
 intended for internal usage by carefully aligned functions!}
 }
 \description{
 Compute get gradient of `L(V)` given a dataset `X`.
 }
 \keyword{internal}
--- a/CVE_C/man/null.Rd
+++ b/CVE_C/man/null.Rd
@ -0,0 +1,18 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/util.R
 \name{null}
 \alias{null}
 \title{Null space basis of given matrix `V`}
 \usage{
 null(V)
 }
 \arguments{
 \item{V}{`(p, q)` matrix}
 }
 \value{
 Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`.
 }
 \description{
 Null space basis of given matrix `V`
 }
 \keyword{internal}
--- a/CVE_C/man/plot.cve.Rd
+++ b/CVE_C/man/plot.cve.Rd
@ -0,0 +1,28 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/CVE.R
 \name{plot.cve}
 \alias{plot.cve}
 \title{Ploting helper for objects of class \code{cve}.}
 \usage{
 ## S3 method for class 'cve'
 plot(x, content = "history", ...)
 }
 \arguments{
 \item{x}{Object of class \code{cve} (result of [cve()]).}
 \item{...}{Pass through parameters to [plot()] and [lines()]}
 \item{content}{Specifies what to plot:
 \itemize{
     \item "history" Plots the loss history from stiefel optimization
         (default).
     \item ... TODO: add (if there are any)
 }}
 }
 \description{
 Ploting helper for objects of class \code{cve}.
 }
 \seealso{
 see \code{\link{par}} for graphical parameters to pass through
     as well as \code{\link{plot}} for standard plot utility.
 }
--- a/CVE_C/man/rStiefl.Rd
+++ b/CVE_C/man/rStiefl.Rd
@ -0,0 +1,22 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/util.R
 \name{rStiefl}
 \alias{rStiefl}
 \title{Samples uniform from the Stiefel Manifold}
 \usage{
 rStiefl(p, q)
 }
 \arguments{
 \item{p}{row dim.}
 \item{q}{col dim.}
 }
 \value{
 `(p, q)` semi-orthogonal matrix
 }
 \description{
 Samples uniform from the Stiefel Manifold
 }
 \examples{
 V <- rStiefel(6, 4)
 }
--- a/CVE_C/man/summary.cve.Rd
+++ b/CVE_C/man/summary.cve.Rd
@ -0,0 +1,14 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/CVE.R
 \name{summary.cve}
 \alias{summary.cve}
 \title{Prints a summary of a \code{cve} result.}
 \usage{
 \method{summary}{cve}(object, ...)
 }
 \arguments{
 \item{object}{Instance of 'cve' as return of \code{cve}.}
 }
 \description{
 Prints a summary of a \code{cve} result.
 }
--- a/CVE_C/src/Makevars
+++ b/CVE_C/src/Makevars
@ -0,0 +1,14 @@
 ## With R 3.1.0 or later, you can uncomment the following line to tell R to 
 ## enable compilation with C++11 (where available)
 ##
 ## Also, OpenMP support in Armadillo prefers C++11 support. However, for wider
 ## availability of the package we do not yet enforce this here.  It is however
 ## recommended for client packages to set it.
 ##
 ## And with R 3.4.0, and RcppArmadillo 0.7.960.*, we turn C++11 on as OpenMP
 ## support within Armadillo prefers / requires it
 CXX_STD = CXX11
 PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) 
 PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
--- a/CVE_C/src/Makevars.win
+++ b/CVE_C/src/Makevars.win
@ -0,0 +1,14 @@
 ## With R 3.1.0 or later, you can uncomment the following line to tell R to 
 ## enable compilation with C++11 (where available)
 ##
 ## Also, OpenMP support in Armadillo prefers C++11 support. However, for wider
 ## availability of the package we do not yet enforce this here.  It is however
 ## recommended for client packages to set it.
 ##
 ## And with R 3.4.0, and RcppArmadillo 0.7.960.*, we turn C++11 on as OpenMP
 ## support within Armadillo prefers / requires it
 CXX_STD = CXX11
 PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) 
 PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
--- a/CVE_C/src/config.h
+++ b/CVE_C/src/config.h
@ -0,0 +1,7 @@
 #ifndef CVE_INCLUDE_GUARD_CONFIG_
 #define CVE_INCLUDE_GUARD_CONFIG_
 #define CVE_MEM_CHUNK_SIZE 2032
 #define CVE_MEM_CHUNK_SMALL 1016
 #endif /* CVE_INCLUDE_GUARD_CONFIG_ */
--- a/CVE_C/src/export.c
+++ b/CVE_C/src/export.c
@ -0,0 +1,29 @@
 #include <Rinternals.h>
 void grad(const int n, const int p, const int q,
          const double *X,
          const double *X_diff,
          const double *Y,
          const double *V,
          const double h,
          double *G, double *loss);
 SEXP grad_c(SEXP X, SEXP X_diff, SEXP Y, SEXP V, SEXP h) {
    SEXP G = PROTECT(allocMatrix(REALSXP, nrows(V), ncols(V)));
    SEXP loss = PROTECT(ScalarReal(0.0));
    grad(nrows(X), ncols(X), ncols(V),
         REAL(X), REAL(X_diff), REAL(Y), REAL(V), *REAL(h),
         REAL(G), REAL(loss));
    SEXP out = PROTECT(allocVector(VECSXP, 2));
    SET_VECTOR_ELT(out, 0, G);
    SET_VECTOR_ELT(out, 1, loss);
    SEXP names = PROTECT(allocVector(STRSXP, 2));
    SET_STRING_ELT(names, 0, mkChar("G"));
    SET_STRING_ELT(names, 1, mkChar("loss"));
    setAttrib(out, install("names"), names);
    UNPROTECT(4);
    return out;
 }
--- a/CVE_C/src/grad.c
+++ b/CVE_C/src/grad.c
@ -0,0 +1,123 @@
 #include <stdlib.h>
 #include <math.h>
 #include "sums.h"
 #include "sweep.h"
 #include "matrix.h"
 #include "indexing.h"
 // TODO: clarify
 static inline double gaussKernel(const double x, const double scale) {
    return exp(scale * x * x);
 }
 // TODO: mutch potential for optimization!!!
 static void weightedYandLoss(const int n,
                             const double *Y,
                             const double *vecD,
                             const double *vecW,
                             const double *colSums,
                             double *y1, double *L, double *vecS,
                             double *loss) {
    int i, j, k, N = n * (n - 1) / 2;
    double l;
    for (i = 0; i < n; ++i) {
        y1[i] = Y[i] / colSums[i];
        L[i] = Y[i] * Y[i] / colSums[i];
    }
    for (k = j = 0; j < n; ++j) {
        for (i = j + 1; i < n; ++i, ++k) {
            y1[i] += Y[j] * vecW[k] / colSums[i];
            y1[j] += Y[i] * vecW[k] / colSums[j];
            L[i] += Y[j] * Y[j] * vecW[k] / colSums[i];
            L[j] += Y[i] * Y[i] * vecW[k] / colSums[j];
        }
    }
    l = 0.0;
    for (i = 0; i < n; ++i) {
        l += (L[i] -= y1[i] * y1[i]);
    }
    *loss = l / (double)n;
    for (k = j = 0; j < n; ++j) {
        for (i = j + 1; i < n; ++i, ++k) {
            l = Y[j] - y1[i];
            vecS[k]  = (L[i] - (l * l)) / colSums[i];
            l = Y[i] - y1[j];
            vecS[k] += (L[j] - (l * l)) / colSums[j];
        }
    }
    for (k = 0; k < N; ++k) {
        vecS[k] *= vecW[k] * vecD[k];
    }
 }
 void grad(const int n, const int p, const int q,
          const double *X,
          const double *X_diff,
          const double *Y,
          const double *V,
          const double h,
          double *G, double *loss) {
    // Number of X_i to X_j not trivial pairs.
    int i, N = (n * (n - 1)) / 2;
    double scale = -0.5 / h;
    if (X_diff == (void*)0) {
        // TODO: ...
    }
    // Allocate and compute projection matrix `Q = I_p - V * V^T`
    double *Q = (double*)malloc(p * p * sizeof(double));
    nullProj(V, p, q, Q);
    // allocate and compute vectorized distance matrix with a temporary
    // projection of `X_diff`.
    double *vecD = (double*)malloc(N * sizeof(double));
    double *X_proj;
    if (p < 5) { // TODO: refine that!
        X_proj = (double*)malloc(N * 5 * sizeof(double));
    } else {
        X_proj = (double*)malloc(N * p * sizeof(double));
    }
    matrixprod(X_diff, N, p, Q, p, p, X_proj);
    rowSquareSums(X_proj, N, p, vecD);
    // Apply kernel to distence vector for weights computation.
    double *vecW = X_proj; // reuse memory area, no longer needed.
    for (i = 0; i < N; ++i) {
        vecW[i] = gaussKernel(vecD[i], scale);
    }
    double *colSums = X_proj + N; // still allocated!
    rowSumsSymVec(vecW, n, 1.0, colSums); // rowSums = colSums cause Sym
    // compute weighted responces of first end second momontum, aka y1, y2.
    double *y1 = X_proj + N + n;
    double *L = X_proj + N + (2 * n);
    // Allocate X_diff scaling vector `vecS`, not in `X_proj` mem area because
    // used symultanious to X_proj in final gradient computation.
    double *vecS = (double*)malloc(N * sizeof(double));
    weightedYandLoss(n, Y, vecD, vecW, colSums, y1, L, vecS, loss);
    // compute the gradient using X_proj for intermidiate scaled X_diff.
    rowSweep(X_diff, N, p, "*", vecS, X_proj);
    // reuse Q which has the required dim (p, p).
    crossprod(X_diff, N, p, X_proj, N, p, Q);
    // Product with V
    matrixprod(Q, p, p, V, p, q, G);
    // And final scaling (TODO: move into matrixprod!)
    scale = -2.0 / (((double)n) * h * h);
    N = p * q;
    for (i = 0; i < N; ++i) {
        G[i] *= scale;
    }
    free(vecS);
    free(X_proj);
    free(vecD);
    free(Q);
 }
--- a/CVE_C/src/indexing.c
+++ b/CVE_C/src/indexing.c
@ -0,0 +1,12 @@
 #include "indexing.h"
 void rangePairs(const int from, const int to, int *pairs) {
    int i, j, k;
    for (k = 0, i = from; i < to; ++i) {
       for (j = i + 1; j < to; ++j, k += 2) {
            pairs[k] = i;
            pairs[k + 1] = j;
        }
    }
 }
--- a/CVE_C/src/indexing.h
+++ b/CVE_C/src/indexing.h
@ -0,0 +1,8 @@
 /* Include Guard */
 #ifndef CVE_INCLUDE_GUARD_INDEXING_
 #define CVE_INCLUDE_GUARD_INDEXING_
 void rangePairs(const int from, const int to, int *pairs);
 #endif /* CVE_INCLUDE_GUARD_INDEXING_ */
--- a/CVE_C/src/init.c
+++ b/CVE_C/src/init.c
@ -0,0 +1,23 @@
 #include <R.h>
 #include <Rinternals.h>
 #include <stdlib.h> // for NULL
 #include <R_ext/Rdynload.h>
 /* FIXME: 
   Check these declarations against the C/Fortran source code.
 */
 /* .Call calls */
 extern SEXP grad_c(SEXP, SEXP, SEXP, SEXP, SEXP);
 static const R_CallMethodDef CallEntries[] = {
    {"grad_c",          (DL_FUNC) &grad_c,          5},
    {NULL, NULL, 0}
 };
 /* Restrict C entrypoints to registered routines. */
 void R_initCVE(DllInfo *dll)
 {
    R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
    R_useDynamicSymbols(dll, FALSE);
 }
--- a/CVE_C/src/matrix.c
+++ b/CVE_C/src/matrix.c
@ -0,0 +1,71 @@
 #include <string.h> // for `mem*` functions.
 #include "config.h"
 #include "matrix.h"
 #include <R_ext/BLAS.h>
 void matrixprod(const double *A, const int nrowA, const int ncolA,
                const double *B, const int nrowB, const int ncolB,
                double *C) {
    const double one = 1.0;
    const double zero = 0.0;
    // DGEMM with parameterization:
    //     C <- A %*% B
    F77_NAME(dgemm)("N", "N", &nrowA, &ncolB, &ncolA,
                    &one, A, &nrowA, B, &nrowB,
                    &zero, C, &nrowA);
 }
 void crossprod(const double *A, const int nrowA, const int ncolA,
               const double *B, const int nrowB, const int ncolB,
               double *C) {
    const double one = 1.0;
    const double zero = 0.0;
    // DGEMM with parameterization:
    //     C <- t(A) %*% B
    F77_NAME(dgemm)("T", "N", &ncolA, &ncolB, &nrowA,
                    &one, A, &nrowA, B, &nrowB,
                    &zero, C, &ncolA);
 }
 void nullProj(const double *B, const int nrowB, const int ncolB,
              double *Q) {
    const double minusOne = -1.0;
    const double one = 1.0;
    // Initialize as identity matrix.
    memset(Q, 0, sizeof(double) * nrowB * nrowB);
    double *Q_diag, *Q_end = Q + nrowB * nrowB;
    for (Q_diag = Q; Q_diag < Q_end; Q_diag += nrowB + 1) {
        *Q_diag = 1.0;
    }
    // DGEMM with parameterization:
    //     C <- (-1.0 * B %*% t(B)) + C
    F77_NAME(dgemm)("N", "T", &nrowB, &nrowB, &ncolB,
                    &minusOne, B, &nrowB, B, &nrowB,
                    &one, Q, &nrowB);
 }
 // A dence skwe-symmetric rank 2 update.
 // Perform the update
 //      C := alpha (A * B^T - B * A^T) + beta C
 void skewSymRank2k(const int nrow, const int ncol,
                   double alpha, const double *A, const double *B,
                   double beta,
                   double *C) {
    F77_NAME(dgemm)("N", "T",
             &nrow, &nrow, &ncol,
             &alpha, A, &nrow, B, &nrow,
             &beta, C, &nrow);
    alpha *= -1.0;
    beta = 1.0;
    F77_NAME(dgemm)("N", "T",
             &nrow, &nrow, &ncol,
             &alpha, B, &nrow, A, &nrow,
             &beta, C, &nrow);
 }
--- a/CVE_C/src/matrix.h
+++ b/CVE_C/src/matrix.h
@ -0,0 +1,25 @@
 /* Include Guard */
 #ifndef CVE_INCLUDE_GUARD_MATRIX_
 #define CVE_INCLUDE_GUARD_MATRIX_
 void matrixprod(const double *A, const int nrowA, const int ncolA,
                const double *B, const int nrowB, const int ncolB,
                       double *C);
 void crossprod(const double *A, const int nrowA, const int ncolA,
               const double *B, const int nrowB, const int ncolB,
               double *C);
 void nullProj(const double *B, const int nrowB, const int ncolB,
              double *Q);
 // A dence skwe-symmetric rank 2 update.
 // Perform the update
 //      C := alpha (A * B^T - B * A^T) + beta C
 void skewSymRank2k(const int nrow, const int ncol,
                   double alpha, const double *A, const double *B,
                   double beta,
                   double *C);
 #endif /* CVE_INCLUDE_GUARD_MATRIX_ */
--- a/CVE_C/src/sums.c
+++ b/CVE_C/src/sums.c
@ -0,0 +1,113 @@
 #include <string.h> // for `mem*` functions.
 #include "config.h"
 #include "sums.h"
 void rowSums(const double *A, const int nrow, const int ncol,
             double *sum) {
    int i, j, block_size, block_size_i;
    const double *A_block = A;
    const double *A_end = A + nrow * ncol;
    if (nrow > CVE_MEM_CHUNK_SIZE) {
        block_size = CVE_MEM_CHUNK_SIZE;
    } else {
        block_size = nrow;
    }
    // Iterate `(block_size_i, ncol)` submatrix blocks.
    for (i = 0; i < nrow; i += block_size_i) {
        // Reset `A` to new block beginning.
        A = A_block;
        // Take block size of eveything left and reduce to max size.
        block_size_i = nrow - i;
        if (block_size_i > block_size) {
            block_size_i = block_size;
        }
        // Compute first blocks column,
        for (j = 0; j < block_size_i; ++j) {
            sum[j] = A[j];
        }
        // and sum the following columns to the first one.
        for (A += nrow; A < A_end; A += nrow) {
            for (j = 0; j < block_size_i; ++j) {
                sum[j] += A[j];
            }
        }
        // Step one block forth.
        A_block += block_size_i;
        sum += block_size_i;
    }
 }
 void colSums(const double *A, const int nrow, const int ncol,
             double *sum) {
    int j;
    double *sum_end = sum + ncol;
    memset(sum, 0, sizeof(double) * ncol);
    for (; sum < sum_end; ++sum) {
        for (j = 0; j < nrow; ++j) {
            *sum += A[j];
        }
        A += nrow;
    }
 }
 void rowSquareSums(const double *A, const int nrow, const int ncol,
                   double *sum) {
    int i, j, block_size, block_size_i;
    const double *A_block = A;
    const double *A_end = A + nrow * ncol;
    if (nrow < CVE_MEM_CHUNK_SIZE) {
        block_size = nrow;
    } else {
        block_size = CVE_MEM_CHUNK_SIZE;
    }
    // Iterate `(block_size_i, ncol)` submatrix blocks.
    for (i = 0; i < nrow; i += block_size_i) {
        // Reset `A` to new block beginning.
        A = A_block;
        // Take block size of eveything left and reduce to max size.
        block_size_i = nrow - i;
        if (block_size_i > block_size) {
            block_size_i = block_size;
        }
        // Compute first blocks column,
        for (j = 0; j < block_size_i; ++j) {
            sum[j] = A[j] * A[j];
        }
        // and sum the following columns to the first one.
        for (A += nrow; A < A_end; A += nrow) {
            for (j = 0; j < block_size_i; ++j) {
                sum[j] += A[j] * A[j];
            }
        }
        // Step one block forth.
        A_block += block_size_i;
        sum += block_size_i;
    }
 }
 void rowSumsSymVec(const double *Avec, const int nrow,
                   const double diag,
                   double *sum) {
    int i, j;
    if (diag == 0.0) {
        memset(sum, 0, nrow * sizeof(double));
    } else {
        for (i = 0; i < nrow; ++i) {
            sum[i] = diag;
        }
    }
    for (j = 0; j < nrow; ++j) {
        for (i = j + 1; i < nrow; ++i, ++Avec) {
            sum[j] += *Avec;
            sum[i] += *Avec;
        }
    }
 }
--- a/CVE_C/src/sums.h
+++ b/CVE_C/src/sums.h
@ -0,0 +1,19 @@
 /* Include Guard */
 #ifndef CVE_INCLUDE_GUARD_SUMS_
 #define CVE_INCLUDE_GUARD_SUMS_
 void rowSums(const double *A, const int nrow, const int ncol,
             double *sum);
 void colSums(const double *A, const int nrow, const int ncol,
             double *sum);
 void rowSquareSums(const double *A, const int nrow, const int ncol,
                   double *sum);
 void rowSumsSymVec(const double *Avec, const int nrow,
                   const double diag,
                   double *sum);
 #endif /* CVE_INCLUDE_GUARD_SUMS_ */
--- a/CVE_C/src/sweep.c
+++ b/CVE_C/src/sweep.c
@ -0,0 +1,113 @@
 #include <R_ext/Error.h> // for `error`.
 #include "config.h"
 #include "sweep.h"
 /* C[, j] = A[, j] * v for each j = 1 to ncol */
 void rowSweep(const double *A, const int nrow, const int ncol,
              const char* op,
              const double *v, // vector of length nrow
              double *C) {
    int i, j, block_size, block_size_i;
    const double *A_block = A;
    double *C_block = C;
    const double *A_end = A + nrow * ncol;
    if (nrow > CVE_MEM_CHUNK_SMALL) { // small because 3 vectors in cache
        block_size = CVE_MEM_CHUNK_SMALL;
    } else {
        block_size = nrow;
    }
    if (*op == '+') {
        // Iterate `(block_size_i, ncol)` submatrix blocks.
        for (i = 0; i < nrow; i += block_size_i) {
            // Set `A` and `C` to block beginning.
            A = A_block;
            C = C_block;
            // Get current block's row size.
            block_size_i = nrow - i;
            if (block_size_i > block_size) {
                block_size_i = block_size;
            }
            // Perform element wise operation for block.
            for (; A < A_end; A += nrow, C += nrow) {
                for (j = 0; j < block_size_i; ++j) {
                    C[j] = A[j] + v[j]; // FUN = '+'
                }
            }
            // Step one block forth.
            A_block += block_size_i;
            C_block += block_size_i;
            v += block_size_i;
        }
    } else if (*op == '-') {
        // Iterate `(block_size_i, ncol)` submatrix blocks.
        for (i = 0; i < nrow; i += block_size_i) {
            // Set `A` and `C` to block beginning.
            A = A_block;
            C = C_block;
            // Get current block's row size.
            block_size_i = nrow - i;
            if (block_size_i > block_size) {
                block_size_i = block_size;
            }
            // Perform element wise operation for block.
            for (; A < A_end; A += nrow, C += nrow) {
                for (j = 0; j < block_size_i; ++j) {
                    C[j] = A[j] - v[j]; // FUN = '-'
                }
            }
            // Step one block forth.
            A_block += block_size_i;
            C_block += block_size_i;
            v += block_size_i;
        }
    } else if (*op == '*') {
        // Iterate `(block_size_i, ncol)` submatrix blocks.
        for (i = 0; i < nrow; i += block_size_i) {
            // Set `A` and `C` to block beginning.
            A = A_block;
            C = C_block;
            // Get current block's row size.
            block_size_i = nrow - i;
            if (block_size_i > block_size) {
                block_size_i = block_size;
            }
            // Perform element wise operation for block.
            for (; A < A_end; A += nrow, C += nrow) {
                for (j = 0; j < block_size_i; ++j) {
                    C[j] = A[j] * v[j]; // FUN = '*'
                }
            }
            // Step one block forth.
            A_block += block_size_i;
            C_block += block_size_i;
            v += block_size_i;
        }
    } else if (*op == '/') {
        // Iterate `(block_size_i, ncol)` submatrix blocks.
        for (i = 0; i < nrow; i += block_size_i) {
            // Set `A` and `C` to block beginning.
            A = A_block;
            C = C_block;
            // Get current block's row size.
            block_size_i = nrow - i;
            if (block_size_i > block_size) {
                block_size_i = block_size;
            }
            // Perform element wise operation for block.
            for (; A < A_end; A += nrow, C += nrow) {
                for (j = 0; j < block_size_i; ++j) {
                    C[j] = A[j] / v[j]; // FUN = '/'
                }
            }
            // Step one block forth.
            A_block += block_size_i;
            C_block += block_size_i;
            v += block_size_i;
        }
    } else {
        error("Got unknown 'op' (opperation) argument.");
    }
 }
--- a/CVE_C/src/sweep.h
+++ b/CVE_C/src/sweep.h
@ -0,0 +1,11 @@
 /* Include Guard */
 #ifndef CVE_INCLUDE_GUARD_SWEEP_
 #define CVE_INCLUDE_GUARD_SWEEP_
 void rowSweep(const double *A, const int nrow, const int ncol,
              const char* op,
              const double *v, // vector of length nrow
              double *C);
 #endif /* CVE_INCLUDE_GUARD_SWEEP_ */
--- a/notes.md
+++ b/notes.md
@ -6,6 +6,35 @@ grep --include=*\.{c,h,R} -rnw '.' -e "sweep"
 ```
 searches in all `C` source and header fils as well as `R` source files for the term _sweep_.
 ## Recursive dir. compair with colored sructure (more or less).
 ```bash
 diff -r CVE_R/ CVE_C/ | grep -E "^([<>]|[^<>].*)"
 ```
 ## Parsing `bash` script parameters.
 ```bash
 usage="$0 [-v|--verbose] [-n|--dry-run] [(-s|--stack-size) <size>] [-h|--help] [-- [p1, [p2, ...]]]"
 verbose=false
 help=false
 dry_run=false
 stack_size=0
 while [ $# -gt 0 ]; do
    case "$1" in
        -v | --verbose )    verbose=true;      shift ;;
        -n | --dry-run )    dry_run=true;      shift ;;
        -s | --stack-size ) stack_size="$2";   shift; shift ;;
        -h | --help )       echo $usage;       exit ;; # On help print usage and exit.
        -- ) shift; break ;;            # Break param "parsing".
         * ) echo $usage >&2; exit 1 ;; # Print usage and exit with failure.
    esac
 done
 echo verbose=$verbose
 echo dry_run=$dry_run
 echo stack_size=$stack_size
 ```
 # Development
 ## Build and install.
 To build the package the `devtools` package is used. This also provides `roxygen2` which is used for documentation and authomatic creaton of the `NAMESPACE` file.
--- a/wip.R
+++ b/wip.R
@ -118,6 +118,17 @@ microbenchmark(
 )
 ## Matrix-Matrix opperation .call ---------------------------------------------
 transpose.c <- function(A) {
    stopifnot(
        is.matrix(A), is.numeric(A)
    )
    if (!is.double(A)) {
        A <- matrix(as.double(A), nrow(A), ncol(A))
    }
    .Call('R_transpose', PACKAGE = 'wip', A)
 }
 matrixprod.c <- function(A, B) {
    stopifnot(
        is.matrix(A), is.numeric(A),
@ -174,6 +185,14 @@ m <- 300
 A <- matrix(runif(n * k), n, k)
 B <- matrix(runif(k * m), k, m)
 stopifnot(
    all.equal(t(A), transpose.c(A))
 )
 microbenchmark(
    t(A),
    transpose.c(A)
 )
 stopifnot(
    all.equal(A %*% B, matrixprod.c(A, B))
 )
@ -326,4 +345,4 @@ stopifnot(all.equal(
 microbenchmark(
    grad = grad(X, Y, V, h),
    gradient.c = gradient.c(X, X_diff, Y, V, h)
-)
+)
--- a/wip.c
+++ b/wip.c
@ -1,4 +1,5 @@
 #include <stdlib.h>
 #include <string.h> // for `mem*` functions.
 #include <R_ext/BLAS.h>
 #include <R_ext/Lapack.h>
@ -99,15 +100,15 @@ static inline void rowSquareSums(const double *A,
 }
 static inline void rowSumsSymVec(const double *Avec, const int nrow,
-                                 const double *diag,
+                                 const double diag,
                                 double *sum) {
    int i, j;
-    if (*diag == 0.0) {
+    if (diag == 0.0) {
        memset(sum, 0, nrow * sizeof(double));
    } else {
        for (i = 0; i < nrow; ++i) {
-            sum[i] = *diag;
+            sum[i] = diag;
        }
    }
@ -228,6 +229,18 @@ static void rowSweep(const double *A, const int nrow, const int ncol,
    }
 }
 void transpose(const double *A, const int nrow, const int ncol, double* T) {
    int i, j, len = nrow * ncol;
    // Filling column-wise and accessing row-wise.
    for (i = 0, j = 0; i < len; ++i, j += nrow) {
        if (j >= len) {
            j -= len - 1;
        }
        T[i] = A[j];
    }
 }
 static inline void matrixprod(const double *A, const int nrowA, const int ncolA,
                              const double *B, const int nrowB, const int ncolB,
                              double *C) {
@ -363,7 +376,6 @@ static void gradient(const int n, const int p, const int q,
    // Number of X_i to X_j not trivial pairs.
    int i, N = (n * (n - 1)) / 2;
    double scale = -0.5 / h;
    const double one = 1.0;
    if (X_diff == (void*)0) {
        // TODO: ...
@ -391,7 +403,7 @@ static void gradient(const int n, const int p, const int q,
        vecW[i] = gaussKernel(vecD[i], scale);
    }
    double *colSums = X_proj + N; // still allocated!
-    rowSumsSymVec(vecW, n, &one, colSums); // rowSums = colSums cause Sym
+    rowSumsSymVec(vecW, n, 1.0, colSums); // rowSums = colSums cause Sym
    // compute weighted responces of first end second momontum, aka y1, y2.
    double *y1 = X_proj + N + n;
--- a/wip.h
+++ b/wip.h
@ -1,5 +1,5 @@
-#ifndef _CVE_INCLUDE_GUARD_
+#ifndef CVE_INCLUDE_GUARD_
-#define _CVE_INCLUDE_GUARD_
+#define CVE_INCLUDE_GUARD_
 #include <Rinternals.h>
@ -41,12 +41,12 @@ SEXP R_rowSquareSums(SEXP A) {
 }
 static inline void rowSumsSymVec(const double *Avec, const int nrow,
-                                 const double *diag,
+                                 const double diag,
                                 double *sum);
 SEXP R_rowSumsSymVec(SEXP Avec, SEXP nrow, SEXP diag) {
    SEXP sum = PROTECT(allocVector(REALSXP, *INTEGER(nrow)));
-    rowSumsSymVec(REAL(Avec), *INTEGER(nrow), REAL(diag), REAL(sum));
+    rowSumsSymVec(REAL(Avec), *INTEGER(nrow), *REAL(diag), REAL(sum));
    UNPROTECT(1);
    return sum;
@ -67,6 +67,16 @@ SEXP R_rowSweep(SEXP A, SEXP v, SEXP op) {
    return C;
 }
 void transpose(const double *A, const int nrow, const int ncol, double* T);
 SEXP R_transpose(SEXP A) {
    SEXP T = PROTECT(allocMatrix(REALSXP, ncols(A), nrows(A)));
    transpose(REAL(A), nrows(A), ncols(A), REAL(T));
    UNPROTECT(1); /* T */
    return T;
 }
 static inline void matrixprod(const double *A, const int nrowA, const int ncolA,
                              const double *B, const int nrowB, const int ncolB,
                              double *C);
@ -156,4 +166,4 @@ SEXP R_gradient(SEXP X, SEXP X_diff, SEXP Y, SEXP V, SEXP h) {
    return G;
 }
-#endif /* _CVE_INCLUDE_GUARD_ */
+#endif /* CVE_INCLUDE_GUARD_ */