diff --git a/CVE_C/DESCRIPTION b/CVE_C/DESCRIPTION
new file mode 100644
index 0000000..988b3d8
--- /dev/null
+++ b/CVE_C/DESCRIPTION
@@ -0,0 +1,11 @@
+Package: CVE
+Type: Package
+Title: Conditional Variance Estimator for Sufficient Dimension Reduction
+Version: 0.1
+Date: 2019-08-29
+Author: Loki
+Maintainer: Loki <loki@no.mail>
+Description: Implementation of the Conditional Variance Estimation (CVE) method. This package version is writen in pure R.
+License: GPL-3
+Encoding: UTF-8
+RoxygenNote: 6.1.1
diff --git a/CVE_C/NAMESPACE b/CVE_C/NAMESPACE
new file mode 100644
index 0000000..65deac3
--- /dev/null
+++ b/CVE_C/NAMESPACE
@@ -0,0 +1,24 @@
+# Generated by roxygen2: do not edit by hand
+
+S3method(plot,cve)
+S3method(summary,cve)
+export(cve)
+export(cve.call)
+export(cve.grid.search)
+export(cve_linesearch)
+export(cve_sgd)
+export(cve_simple)
+export(dataset)
+export(elem.pairs)
+export(estimate.bandwidth)
+export(grad)
+export(null)
+export(rStiefl)
+import(stats)
+importFrom(graphics,lines)
+importFrom(graphics,plot)
+importFrom(graphics,points)
+importFrom(stats,model.frame)
+importFrom(stats,rbinom)
+importFrom(stats,rnorm)
+useDynLib(CVE, .registration = TRUE)
diff --git a/CVE_C/R/CVE.R b/CVE_C/R/CVE.R
new file mode 100644
index 0000000..f32f9a6
--- /dev/null
+++ b/CVE_C/R/CVE.R
@@ -0,0 +1,223 @@
+#' Conditional Variance Estimator (CVE)
+#'
+#' Conditional Variance Estimator for Sufficient Dimension
+#'  Reduction
+#'
+#' TODO: And some details
+#'
+#'
+#' @references Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
+#'
+#' @docType package
+#' @author Loki
+#' @useDynLib CVE, .registration = TRUE
+"_PACKAGE"
+
+#' Implementation of the CVE method.
+#'
+#' Conditional Variance Estimator (CVE) is a novel sufficient dimension
+#' reduction (SDR) method assuming a model
+#' \deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
+#' where B'X is a lower dimensional projection of the predictors.
+#'
+#' @param formula Formel for the regression model defining `X`, `Y`.
+#'  See: \code{\link{formula}}.
+#' @param data data.frame holding data for formula.
+#' @param method The different only differe in the used optimization.
+#'  All of them are Gradient based optimization on a Stiefel manifold.
+#' \itemize{
+#'      \item "simple" Simple reduction of stepsize.
+#'      \item "sgd" stocastic gradient decent.
+#'      \item TODO: further
+#' }
+#' @param ... Further parameters depending on the used method.
+#' @examples
+#' library(CVE)
+#'
+#' # sample dataset
+#' ds <- dataset("M5")
+#'
+#' # call ´cve´ with default method (aka "simple")
+#' dr.simple <- cve(ds$Y ~ ds$X, k = ncol(ds$B))
+#' # plot optimization history (loss via iteration)
+#' plot(dr.simple, main = "CVE M5 simple")
+#'
+#' # call ´cve´ with method "linesearch" using ´data.frame´ as data.
+#' data <- data.frame(Y = ds$Y, X = ds$X)
+#' # Note: ´Y, X´ are NOT defined, they are extracted from ´data´.
+#' dr.linesearch <- cve(Y ~ ., data, method = "linesearch", k = ncol(ds$B))
+#' plot(dr.linesearch, main = "CVE M5 linesearch")
+#'
+#' @references Fertl L., Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
+#'
+#' @seealso \code{\link{formula}}. For a complete parameters list (dependent on
+#'  the method) see \code{\link{cve_simple}}, \code{\link{cve_sgd}}
+#' @import stats
+#' @importFrom stats model.frame
+#' @export
+cve <- function(formula, data, method = "simple", max.dim = 10, ...) {
+    # check for type of `data` if supplied and set default
+    if (missing(data)) {
+        data <- environment(formula)
+    } else if (!is.data.frame(data)) {
+        stop('Parameter `data` must be a `data.frame` or missing.')
+    }
+
+    # extract `X`, `Y` from `formula` with `data`
+    model <- stats::model.frame(formula, data)
+    X <- as.matrix(model[,-1, drop = FALSE])
+    Y <- as.matrix(model[, 1, drop = FALSE])
+
+    # pass extracted data on to [cve.call()]
+    dr <- cve.call(X, Y, method = method, ...)
+
+    # overwrite `call` property from [cve.call()]
+    dr$call <- match.call()
+    return(dr)
+}
+
+#' @param nObs as describet in the Paper.
+#' @param X Data
+#' @param Y Responces
+#' @param nObs Like in the paper.
+#' @param k guess for SDR dimension.
+#' @param ... Method specific parameters.
+#' @rdname cve
+#' @export
+cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5,
+                     min.dim = 1, max.dim = 10, k, ...) {
+
+    # parameter checking
+    if (!is.matrix(X)) {
+        stop('X should be a matrices.')
+    }
+    if (is.matrix(Y)) {
+        Y <- as.vector(Y)
+    }
+    if (nrow(X) != length(Y)) {
+        stop('Rows of X and number of Y elements are not compatible.')
+    }
+    if (ncol(X) < 2) {
+        stop('X is one dimensional, no need for dimension reduction.')
+    }
+
+    if (!missing(k)) {
+        min.dim <- as.integer(k)
+        max.dim <- as.integer(k)
+    } else {
+        min.dim <- as.integer(min.dim)
+        max.dim <- as.integer(min(max.dim, ncol(X) - 1L))
+    }
+    if (min.dim > max.dim) {
+        stop('`min.dim` bigger `max.dim`.')
+    }
+    if (max.dim >= ncol(X)) {
+        stop('`max.dim` must be smaller than `ncol(X)`.')
+    }
+
+    # Call specified method.
+    method <- tolower(method)
+    call <- match.call()
+    dr <- list()
+    for (k in min.dim:max.dim) {
+        if (method == 'simple') {
+            dr.k <- cve_simple(X, Y, k, nObs = nObs, ...)
+        } else if (method == 'linesearch') {
+            dr.k <- cve_linesearch(X, Y, k, nObs = nObs, ...)
+        } else if (method == 'sgd') {
+            dr.k <- cve_sgd(X, Y, k, nObs = nObs, ...)
+        } else {
+            stop('Got unknown method.')
+        }
+        dr.k$k <- k
+        class(dr.k) <- "cve.k"
+        dr[[k]] <- dr.k
+    }
+
+    # augment result information
+    dr$method <- method
+    dr$call <- call
+    class(dr) <- "cve"
+    return(dr)
+}
+
+# TODO: write summary
+# summary.cve <- function() {
+#     # code #
+# }
+
+#' Ploting helper for objects of class \code{cve}.
+#'
+#' @param x Object of class \code{cve} (result of [cve()]).
+#' @param content Specifies what to plot:
+#' \itemize{
+#'      \item "history" Plots the loss history from stiefel optimization
+#'          (default).
+#'      \item ... TODO: add (if there are any)
+#' }
+#' @param ... Pass through parameters to [plot()] and [lines()]
+#'
+#' @usage ## S3 method for class 'cve'
+#' plot(x, content = "history", ...)
+#' @seealso see \code{\link{par}} for graphical parameters to pass through
+#'      as well as \code{\link{plot}} for standard plot utility.
+#' @importFrom graphics plot lines points
+#' @method plot cve
+#' @export
+plot.cve <- function(x, ...) {
+
+
+    # H <- x$history
+    # H_1 <- H[!is.na(H[, 1]), 1]
+
+    # defaults <- list(
+    #     main = "History",
+    #     xlab = "Iterations i",
+    #     ylab = expression(loss == L[n](V^{(i)})),
+    #     xlim = c(1, nrow(H)),
+    #     ylim = c(0, max(H)),
+    #     type = "l"
+    # )
+
+    # call.plot <- match.call()
+    # keys <- names(defaults)
+    # keys <- keys[match(keys, names(call.plot)[-1], nomatch = 0) == 0]
+
+    # for (key in keys) {
+    #     call.plot[[key]] <- defaults[[key]]
+    # }
+
+    # call.plot[[1L]] <- quote(plot)
+    # call.plot$x <- quote(1:length(H_1))
+    # call.plot$y <- quote(H_1)
+
+    # eval(call.plot)
+
+    # if (ncol(H) > 1) {
+    #     for (i in 2:ncol(H)) {
+    #         H_i <- H[H[, i] > 0, i]
+    #         lines(1:length(H_i), H_i)
+    #     }
+    # }
+    # x.ends <- apply(H, 2, function(h) { length(h[!is.na(h)]) })
+    # y.ends <- apply(H, 2, function(h) { tail(h[!is.na(h)], n=1) })
+    # points(x.ends, y.ends)
+}
+
+#' Prints a summary of a \code{cve} result.
+#' @param object Instance of 'cve' as return of \code{cve}.
+#' @method summary cve
+#' @export
+summary.cve <- function(object, ...) {
+    cat('Summary of CVE result - Method: "', object$method, '"\n',
+        '\n',
+        'Dataset size:   ', nrow(object$X), '\n',
+        'Data Dimension: ', ncol(object$X), '\n',
+        'SDR Dimension:  ', object$k, '\n',
+        'loss:           ', object$loss, '\n',
+        '\n',
+        'Called via:\n',
+        '    ',
+        sep='')
+    print(object$call)
+}
diff --git a/CVE_C/R/cve_linesearch.R b/CVE_C/R/cve_linesearch.R
new file mode 100644
index 0000000..d2eb115
--- /dev/null
+++ b/CVE_C/R/cve_linesearch.R
@@ -0,0 +1,169 @@
+#' Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
+#' conditions.
+#'
+#' @keywords internal
+#' @export
+cve_linesearch <- function(X, Y, k,
+                           nObs = sqrt(nrow(X)),
+                           h = NULL,
+                           tau = 1.0,
+                           tol = 1e-3,
+                           rho1 = 0.1,
+                           rho2 = 0.9,
+                           slack = 0,
+                           epochs = 50L,
+                           attempts = 10L,
+                           max.linesearch.iter = 10L,
+                           logger = NULL
+) {
+    # Set `grad` functions environment to enable if to find this environments
+    # local variabels, needed to enable the manipulation of this local variables
+    # from within `grad`.
+    environment(grad) <- environment()
+
+    # Get dimensions.
+    n <- nrow(X)
+    p <- ncol(X)
+    q <- p - k
+
+    # Save initial learning rate `tau`.
+    tau.init <- tau
+    # Addapt tolearance for break condition.
+    tol <- sqrt(2 * q) * tol
+
+    # Estaimate bandwidth if not given.
+    if (missing(h) | !is.numeric(h)) {
+        h <- estimate.bandwidth(X, k, nObs)
+    }
+
+    # Compute persistent data.
+    # Compute lookup indexes for symmetrie, lower/upper
+    # triangular parts and vectorization.
+    pair.index <- elem.pairs(seq(n))
+    i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
+    j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
+    # Matrix of vectorized indices. (vec(index) -> seq)
+    index <- matrix(seq(n * n), n, n)
+    lower <- index[lower.tri(index)]
+    upper <- t(index)[lower]
+
+    # Create all pairewise differences of rows of `X`.
+    X_diff <- X[i, , drop = F] - X[j, , drop = F]
+    # Identity matrix.
+    I_p <- diag(1, p)
+
+    # Init tracking of current best (according multiple attempts).
+    V.best <- NULL
+    loss.best <- Inf
+
+    # Start loop for multiple attempts.
+    for (attempt in 1:attempts) {
+
+        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
+        # optimization start value.
+        V <- rStiefl(p, q)
+
+        # Initial loss and gradient.
+        loss <- Inf
+        G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
+        # Set last loss (aka, loss after applying the step).
+        loss.last <- loss
+
+        # Call logger with initial values before starting optimization.
+        if (is.function(logger)) {
+            epoch <- 0 # Set epoch count to 0 (only relevant for logging).
+            error <- NA
+            logger(environment())
+        }
+
+        ## Start optimization loop.
+        for (epoch in 1:epochs) {
+
+            # Cayley transform matrix `A`
+            A <- (G %*% t(V)) - (V %*% t(G))
+
+            # Directional derivative of the loss at current position, given
+            # as `Tr(G^T \cdot A \cdot V)`.
+            loss.prime <- -0.5 * norm(A, type = 'F')^2
+
+            # Linesearch
+            tau.upper <- Inf
+            tau.lower <- 0
+            tau <- tau.init
+            for (iter in 1:max.linesearch.iter) {
+                # Apply learning rate `tau`.
+                A.tau <- (tau / 2) * A
+                # Parallet transport (on Stiefl manifold) into direction of `G`.
+                inv <- solve(I_p + A.tau)
+                V.tau <- inv %*% ((I_p - A.tau) %*% V)
+
+                # Loss at position after a step.
+                loss <- Inf # aka loss.tau
+                G.tau <- grad(X, Y, V.tau, h, loss.out = TRUE, persistent = TRUE)
+
+                # Armijo condition.
+                if (loss > loss.last + (rho1 * tau * loss.prime)) {
+                    tau.upper <- tau
+                    tau <- (tau.lower + tau.upper) / 2
+                    next()
+                }
+
+                V.prime.tau <- -0.5 * inv %*% A %*% (V + V.tau)
+                loss.prime.tau <- sum(G * V.prime.tau) # Tr(grad(tau)^T \cdot Y^'(tau))
+
+                # Wolfe condition.
+                if (loss.prime.tau < rho2 * loss.prime) {
+                    tau.lower <- tau
+                    if (tau.upper == Inf) {
+                        tau <- 2 * tau.lower
+                    } else {
+                        tau <- (tau.lower + tau.upper) / 2
+                    }
+                } else {
+                    break()
+                }
+            }
+
+            # Compute error.
+            error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
+
+            # Check break condition (epoch check to skip ignored gradient calc).
+            # Note: the devision by `sqrt(2 * k)` is included in `tol`.
+            if (error < tol | epoch >= epochs) {
+                # take last step and stop optimization.
+                V <- V.tau
+                # Final call to the logger before stopping optimization
+                if (is.function(logger)) {
+                    G <- G.tau
+                    logger(environment())
+                }
+                break()
+            }
+
+            # Perform the step and remember previous loss.
+            V <- V.tau
+            loss.last <- loss
+            G <- G.tau
+
+            # Log after taking current step.
+            if (is.function(logger)) {
+                logger(environment())
+            }
+
+        }
+
+        # Check if current attempt improved previous ones
+        if (loss < loss.best) {
+            loss.best <- loss
+            V.best <- V
+        }
+
+    }
+
+    return(list(
+        loss = loss.best,
+        V = V.best,
+        B = null(V.best),
+        h = h
+    ))
+}
diff --git a/CVE_C/R/cve_sgd.R b/CVE_C/R/cve_sgd.R
new file mode 100644
index 0000000..3fee47f
--- /dev/null
+++ b/CVE_C/R/cve_sgd.R
@@ -0,0 +1,129 @@
+#' Simple implementation of the CVE method. 'Simple' means that this method is
+#' a classic GD method unsing no further tricks.
+#'
+#' @keywords internal
+#' @export
+cve_sgd <- function(X, Y, k,
+                    nObs = sqrt(nrow(X)),
+                    h = NULL,
+                    tau = 0.01,
+                    tol = 1e-3,
+                    epochs = 50L,
+                    batch.size = 16L,
+                    attempts = 10L,
+                    logger = NULL
+) {
+    # Set `grad` functions environment to enable if to find this environments
+    # local variabels, needed to enable the manipulation of this local variables
+    # from within `grad`.
+    environment(grad) <- environment()
+
+    # Get dimensions.
+    n <- nrow(X) # Number of samples.
+    p <- ncol(X) # Data dimensions
+    q <- p - k   # Complement dimension of the SDR space.
+
+    # Save initial learning rate `tau`.
+    tau.init <- tau
+    # Addapt tolearance for break condition.
+    tol <- sqrt(2 * q) * tol
+
+    # Estaimate bandwidth if not given.
+    if (missing(h) || !is.numeric(h)) {
+        h <- estimate.bandwidth(X, k, nObs)
+    }
+
+    # Compute persistent data.
+    # Compute lookup indexes for symmetrie, lower/upper
+    # triangular parts and vectorization.
+    pair.index <- elem.pairs(seq(n))
+    i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
+    j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
+    # Index of vectorized matrix, for lower and upper triangular part.
+    lower <- ((i - 1) * n) + j
+    upper <- ((j - 1) * n) + i
+
+    # Create all pairewise differences of rows of `X`.
+    X_diff <- X[i, , drop = F] - X[j, , drop = F]
+    # Identity matrix.
+    I_p <- diag(1, p)
+    # Init a list of data indices (shuffled for batching).
+    indices <- seq(n)
+
+    # Init tracking of current best (according multiple attempts).
+    V.best <- NULL
+    loss.best <- Inf
+
+    # Start loop for multiple attempts.
+    for (attempt in 1:attempts) {
+        # Reset learning rate `tau`.
+        tau <- tau.init
+
+        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
+        # optimization start value.
+        V <- rStiefl(p, q)
+        # Keep track of last `V` for computing error after an epoch.
+        V.last <- V
+
+        if (is.function(logger)) {
+            loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
+            error <- NA
+            epoch <- 0
+            logger(environment())
+        }
+
+        # Repeat `epochs` times
+        for (epoch in 1:epochs) {
+            # Shuffle batches
+            batch.shuffle <- sample(indices)
+
+            # Make a step for each batch.
+            for (batch.start in seq(1, n, batch.size)) {
+                # Select batch data indices.
+                batch.end <- min(batch.start + batch.size - 1, length(batch.shuffle))
+                batch <- batch.shuffle[batch.start:batch.end]
+
+                # Compute batch gradient.
+                loss <- NULL
+                G <- grad(X[batch, ], Y[batch], V, h, loss.out = TRUE)
+
+                # Cayley transform matrix.
+                A <- (G %*% t(V)) - (V %*% t(G))
+
+                # Apply learning rate `tau`.
+                A.tau <- tau * A
+                # Parallet transport (on Stiefl manifold) into direction of `G`.
+                V <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
+            }
+            # And the error for the history.
+            error <- norm(V.last %*% t(V.last) - V %*% t(V), type = "F")
+            V.last <- V
+
+            if (is.function(logger)) {
+                # Compute loss at end of epoch for logging.
+                loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
+                logger(environment())
+            }
+
+            # Check break condition.
+            if (error < tol) {
+                break()
+            }
+        }
+        # Compute actual loss after finishing for comparing multiple attempts.
+        loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
+
+        # After each attempt, check if last attempt reached a better result.
+        if (loss < loss.best) {
+            loss.best <- loss
+            V.best <- V
+        }
+    }
+
+    return(list(
+        loss = loss.best,
+        V = V.best,
+        B = null(V.best),
+        h = h
+    ))
+}
diff --git a/CVE_C/R/cve_simple.R b/CVE_C/R/cve_simple.R
new file mode 100644
index 0000000..138751e
--- /dev/null
+++ b/CVE_C/R/cve_simple.R
@@ -0,0 +1,141 @@
+#' Simple implementation of the CVE method. 'Simple' means that this method is
+#' a classic GD method unsing no further tricks.
+#'
+#' @keywords internal
+#' @export
+cve_simple <- function(X, Y, k,
+                       nObs = sqrt(nrow(X)),
+                       h = NULL,
+                       tau = 1.0,
+                       tol = 1e-3,
+                       slack = 0,
+                       epochs = 50L,
+                       attempts = 10L,
+                       logger = NULL
+) {
+    # Set `grad` functions environment to enable if to find this environments
+    # local variabels, needed to enable the manipulation of this local variables
+    # from within `grad`.
+    environment(grad) <- environment()
+
+    # Get dimensions.
+    n <- nrow(X) # Number of samples.
+    p <- ncol(X) # Data dimensions
+    q <- p - k   # Complement dimension of the SDR space.
+
+    # Save initial learning rate `tau`.
+    tau.init <- tau
+    # Addapt tolearance for break condition.
+    tol <- sqrt(2 * q) * tol
+
+    # Estaimate bandwidth if not given.
+    if (missing(h) || !is.numeric(h)) {
+        h <- estimate.bandwidth(X, k, nObs)
+    }
+
+    # Compute persistent data.
+    # Compute lookup indexes for symmetrie, lower/upper
+    # triangular parts and vectorization.
+    pair.index <- elem.pairs(seq(n))
+    i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
+    j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
+    # Index of vectorized matrix, for lower and upper triangular part.
+    lower <- ((i - 1) * n) + j
+    upper <- ((j - 1) * n) + i
+
+    # Create all pairewise differences of rows of `X`.
+    X_diff <- X[i, , drop = F] - X[j, , drop = F]
+    # Identity matrix.
+    I_p <- diag(1, p)
+
+    # Init tracking of current best (according multiple attempts).
+    V.best <- NULL
+    loss.best <- Inf
+
+    # Start loop for multiple attempts.
+    for (attempt in 1:attempts) {
+        # Reset learning rate `tau`.
+        tau <- tau.init
+
+        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
+        # optimization start value.
+        V <- rStiefl(p, q)
+
+        # Initial loss and gradient.
+        loss <- Inf
+        G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
+        # Set last loss (aka, loss after applying the step).
+        loss.last <- loss
+
+        # Cayley transform matrix `A`
+        A <- (G %*% t(V)) - (V %*% t(G))
+
+        # Call logger with initial values before starting optimization.
+        if (is.function(logger)) {
+            epoch <- 0 # Set epoch count to 0 (only relevant for logging).
+            error <- NA
+            logger(environment())
+        }
+
+        ## Start optimization loop.
+        for (epoch in 1:epochs) {
+            # Apply learning rate `tau`.
+            A.tau <- tau * A
+            # Parallet transport (on Stiefl manifold) into direction of `G`.
+            V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
+
+            # Loss at position after a step.
+            loss <- grad(X, Y, V.tau, h, loss.only = TRUE, persistent = TRUE)
+
+            # Check if step is appropriate, iff not reduce learning rate.
+            if ((loss - loss.last) > slack * loss.last) {
+                tau <- tau / 2
+                next() # Keep position and try with smaller `tau`.
+            }
+
+            # Compute error.
+            error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
+
+            # Check break condition (epoch check to skip ignored gradient calc).
+            # Note: the devision by `sqrt(2 * k)` is included in `tol`.
+            if (error < tol || epoch >= epochs) {
+                # take last step and stop optimization.
+                V <- V.tau
+                # Call logger last time befor stoping.
+                if (is.function(logger)) {
+                    logger(environment())
+                }
+                break()
+            }
+
+            # Perform the step and remember previous loss.
+            V <- V.tau
+            loss.last <- loss
+
+            # Call logger after taking a step.
+            if (is.function(logger)) {
+                logger(environment())
+            }
+
+            # Compute gradient at new position.
+            G <- grad(X, Y, V, h, persistent = TRUE)
+
+            # Cayley transform matrix `A`
+            A <- (G %*% t(V)) - (V %*% t(G))
+        }
+
+        # Check if current attempt improved previous ones
+        if (loss < loss.best) {
+            loss.best <- loss
+            V.best <- V
+        }
+
+    }
+
+    return(list(
+        loss = loss.best,
+        V = V.best,
+        B = null(V.best),
+        h = h
+    ))
+}
diff --git a/CVE_C/R/datasets.R b/CVE_C/R/datasets.R
new file mode 100644
index 0000000..68af0f4
--- /dev/null
+++ b/CVE_C/R/datasets.R
@@ -0,0 +1,109 @@
+#' Generates test datasets.
+#'
+#' Provides sample datasets. There are 5 different datasets named
+#' M1, M2, M3, M4 and M5 describet in the paper references below.
+#' The general model is given by:
+#' \deqn{Y ~ g(B'X) + \epsilon}
+#'
+#' @param name One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}
+#' @param n nr samples
+#' @param p Dim. of random variable \code{X}.
+#' @param p.mix Only for \code{"M4"}, see: below.
+#' @param lambda Only for \code{"M4"}, see: below.
+#'
+#' @return List with elements
+#' \itemize{
+#'      \item{X}{data}
+#'      \item{Y}{response}
+#'      \item{B}{Used dim-reduction matrix}
+#'      \item{name}{Name of the dataset (name parameter)}
+#' }
+#'
+#' @section M1:
+#' The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
+#' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
+#' The link function \eqn{g} is given as
+#' \deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + 0.5\epsilon}{g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + 0.5 epsilon}
+#' @section M2:
+#' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a default of \eqn{n = 200} data points.
+#' The link function \eqn{g} is given as
+#' \deqn{g(x) = x_1 x_2^2 + 0.5\epsilon}{g(x) = x_1 x_2^2 + 0.5 epsilon}
+#' @section M3:
+#' TODO:
+#' @section M4:
+#' TODO:
+#' @section M5:
+#' TODO:
+#'
+#' @import stats
+#' @importFrom stats rnorm rbinom
+#' @export
+dataset <- function(name = "M1", n, B, p.mix = 0.3, lambda = 1.0) {
+    # validate parameters
+    stopifnot(name %in% c("M1", "M2", "M3", "M4", "M5"))
+
+    # set default values if not supplied
+    if (missing(n)) {
+        n <- if (name %in% c("M1", "M2")) 200 else if (name != "M5") 100 else 42
+    }
+    if (missing(B)) {
+        p <- 12
+        if (name == "M1") {
+            B <- cbind(
+                c( 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0),
+                c( 1,-1, 1,-1, 1,-1, 0, 0, 0, 0, 0, 0)
+            ) / sqrt(6)
+        } else if (name == "M2") {
+            B <- cbind(
+                c(c(1, 0), rep(0, 10)),
+                c(c(0, 1), rep(0, 10))
+            )
+        } else {
+            B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, 6)), 12, 1)
+        }
+    } else {
+        p <- dim(B)[1]
+        # validate col. nr to match dataset `k = dim(B)[2]`
+        stopifnot(
+            name %in% c("M1", "M2") && dim(B)[2] == 2,
+            name %in% c("M3", "M4", "M5") && dim(B)[2] == 1
+        )
+    }
+
+    # set link function `g` for model `Y ~ g(B'X) + epsilon`
+    if (name == "M1") {
+        g <- function(BX) { BX[1] / (0.5 + (BX[2] + 1.5)^2) }
+    } else if (name == "M2") {
+        g <- function(BX) { BX[1] * BX[2]^2 }
+    } else if (name %in% c("M3", "M4")) {
+        g <- function(BX) { cos(BX[1]) }
+    } else { # name == "M5"
+        g <- function(BX) { 2 * log(abs(BX[1]) + 1) }
+    }
+
+    # compute X
+    if (name != "M4") {
+        # compute root of the covariance matrix according the dataset
+        if (name %in% c("M1", "M3")) {
+            # Variance-Covariance structure for `X ~ N_p(0, \Sigma)` with
+            # `\Sigma_{i, j} = 0.5^{|i - j|}`.
+            Sigma <- matrix(0.5^abs(kronecker(1:p, 1:p, '-')), p, p)
+            # decompose Sigma to Sigma.root^T Sigma.root = Sigma for usage in creation of `X`
+            Sigma.root <- chol(Sigma)
+        } else { # name %in% c("M2", "M5")
+            Sigma.root <- diag(rep(1, p)) # d-dim identity
+        }
+        # data `X` as multivariate random normal variable with
+        # variance matrix `Sigma`.
+        X <- replicate(p, rnorm(n, 0, 1)) %*% Sigma.root
+    } else { # name == "M4"
+        X <- t(replicate(100, rep((1 - 2 * rbinom(1, 1, p.mix)) * lambda, p) + rnorm(p, 0, 1)))
+    }
+
+    # responce `y ~ g(B'X) + epsilon` with `epsilon ~ N(0, 1 / 2)`
+    Y <- apply(X, 1, function(X_i) {
+        g(t(B) %*% X_i) + rnorm(1, 0, 0.5)
+    })
+
+    return(list(X = X, Y = Y, B = B, name = name))
+}
diff --git a/CVE_C/R/estimateBandwidth.R b/CVE_C/R/estimateBandwidth.R
new file mode 100644
index 0000000..8e7edc7
--- /dev/null
+++ b/CVE_C/R/estimateBandwidth.R
@@ -0,0 +1,27 @@
+#' Estimated bandwidth for CVE.
+#'
+#' Estimates a propper bandwidth \code{h} according
+#' \deqn{%
+#' h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
+#' h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
+#'
+#' @param X data matrix of dimension (n x p) with n data points X_i of dimension
+#'  q. Therefor each row represents a datapoint of dimension p.
+#' @param k Guess for rank(B).
+#' @param nObs Ether numeric of a function. If specified as numeric value
+#'  its used in the computation of the bandwidth directly. If its a function
+#'  `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not
+#'  supplied at all is to use \code{nObs <- nrow(x)^0.5}.
+#'
+#' @seealso [\code{\link{qchisq}}]
+#' @export
+estimate.bandwidth <- function(X, k, nObs) {
+    n <- nrow(X)
+    p <- ncol(X)
+
+    X_centered <- scale(X, center=TRUE, scale=FALSE)
+    Sigma <- (1 / n) * t(X_centered) %*% X_centered
+
+    quantil <- qchisq((nObs - 1) / (n - 1), k)
+    return(2 * quantil * sum(diag(Sigma)) / p)
+}
diff --git a/CVE_C/R/gradient.R b/CVE_C/R/gradient.R
new file mode 100644
index 0000000..8baaa88
--- /dev/null
+++ b/CVE_C/R/gradient.R
@@ -0,0 +1,48 @@
+#' Compute get gradient of `L(V)` given a dataset `X`.
+#'
+#' @param X Data matrix.
+#' @param Y Responce.
+#' @param V Position to compute the gradient at, aka point on Stiefl manifold.
+#' @param h Bandwidth
+#' @param loss.out Iff \code{TRUE} loss will be written to parent environment.
+#' @param loss.only Boolean to only compute the loss, of \code{TRUE} a single
+#'  value loss is returned and \code{envir} is ignored.
+#' @param persistent Determines if data indices and dependent calculations shall
+#'  be reused from the parent environment. ATTENTION: Do NOT set this flag, only
+#'  intended for internal usage by carefully aligned functions!
+#' @keywords internal
+#' @export
+grad <- function(X, Y, V, h,
+                 loss.out = FALSE,
+                 loss.only = FALSE,
+                 persistent = FALSE) {
+    # Get number of samples and dimension.
+    n <- nrow(X)
+    p <- ncol(X)
+
+    if (!persistent) {
+        # Compute lookup indexes for symmetrie, lower/upper
+        # triangular parts and vectorization.
+        pair.index <- elem.pairs(seq(n))
+        i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
+        j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
+        # Index of vectorized matrix, for lower and upper triangular part.
+        lower <- ((i - 1) * n) + j
+        upper <- ((j - 1) * n) + i
+
+        # Create all pairewise differences of rows of `X`.
+        X_diff <- X[i, , drop = F] - X[j, , drop = F]
+    }
+
+    out <- .Call("grad_c", PACKAGE = "CVE",
+                 X, X_diff, as.double(Y), V, as.double(h));
+
+    if (loss.only) {
+        return(out$loss)
+    }
+    if (loss.out) {
+        loss <<- out$loss
+    }
+
+    return(out$G)
+}
diff --git a/CVE_C/R/gridSearch.R b/CVE_C/R/gridSearch.R
new file mode 100644
index 0000000..2b2d5e7
--- /dev/null
+++ b/CVE_C/R/gridSearch.R
@@ -0,0 +1,43 @@
+
+#' Performs a grid search for parameters over a parameter grid.
+#' @examples
+#' args <- list(
+#'     h = c(0.05, 0.1, 0.2),
+#'     method = c("simple", "sgd"),
+#'     tau = c(0.5, 0.1, 0.01)
+#' )
+#' cve.grid.search(args)
+#' @export
+cve.grid.search <- function(X, Y, k, args) {
+
+    args$stringsAsFactors = FALSE
+    args$KEEP.OUT.ATTRS = FALSE
+    grid <- do.call(expand.grid, args)
+    grid.length <- length(grid[[1]])
+
+    print(grid)
+
+    for (i in 1:grid.length) {
+        arguments <- as.list(grid[i, ])
+        # Set required arguments
+        arguments$X <- X
+        arguments$Y <- Y
+        arguments$k <- k
+        # print(arguments)
+        dr <- do.call(cve.call, arguments)
+        print(dr$loss)
+    }
+}
+
+# ds <- dataset()
+# X <- ds$X
+# Y <- ds$Y
+# (k <- ncol(ds$B))
+# args <- list(
+#     h = c(0.05, 0.1, 0.2),
+#     method = c("simple", "sgd"),
+#     tau = c(0.5, 0.1, 0.01),
+#     attempts = c(1L)
+# )
+
+# cve.grid.search(X, Y, k, args)
\ No newline at end of file
diff --git a/CVE_C/R/util.R b/CVE_C/R/util.R
new file mode 100644
index 0000000..ca7606a
--- /dev/null
+++ b/CVE_C/R/util.R
@@ -0,0 +1,40 @@
+#' Samples uniform from the Stiefel Manifold
+#'
+#' @param p row dim.
+#' @param q col dim.
+#' @return `(p, q)` semi-orthogonal matrix
+#' @examples
+#'  V <- rStiefel(6, 4)
+#' @export
+rStiefl <- function(p, q) {
+    return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
+}
+
+#' Null space basis of given matrix `V`
+#'
+#' @param V `(p, q)` matrix
+#' @return Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`.
+#' @keywords internal
+#' @export
+null <- function(V) {
+    tmp <- qr(V)
+    set <- if(tmp$rank == 0L) seq_len(ncol(V)) else -seq_len(tmp$rank)
+    return(qr.Q(tmp, complete=TRUE)[, set, drop=FALSE])
+}
+
+#' Creates a (numeric) matrix where each column contains
+#' an element to element matching.
+#' @param elements numeric vector of elements to match
+#' @return matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`.
+#' @keywords internal
+#' @examples
+#'  elem.pairs(seq.int(2, 5))
+#' @export
+elem.pairs <- function(elements) {
+    # Number of elements to match.
+    n <- length(elements)
+    # Create all combinations.
+    pairs <- rbind(rep(elements, each=n), rep(elements, n))
+    # Select unique combinations without self interaction.
+    return(pairs[, pairs[1, ] < pairs[2, ]])
+}
diff --git a/CVE_C/inst/doc/CVE_paper.pdf b/CVE_C/inst/doc/CVE_paper.pdf
new file mode 100755
index 0000000..e870293
Binary files /dev/null and b/CVE_C/inst/doc/CVE_paper.pdf differ
diff --git a/CVE_C/man/CVE-package.Rd b/CVE_C/man/CVE-package.Rd
new file mode 100644
index 0000000..8bb0f5d
--- /dev/null
+++ b/CVE_C/man/CVE-package.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/CVE.R
+\docType{package}
+\name{CVE-package}
+\alias{CVE}
+\alias{CVE-package}
+\title{Conditional Variance Estimator (CVE)}
+\description{
+Conditional Variance Estimator for Sufficient Dimension
+ Reduction
+}
+\details{
+TODO: And some details
+}
+\references{
+Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
+}
+\author{
+Loki
+}
diff --git a/CVE_C/man/cve.Rd b/CVE_C/man/cve.Rd
new file mode 100644
index 0000000..d3e0d05
--- /dev/null
+++ b/CVE_C/man/cve.Rd
@@ -0,0 +1,71 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/CVE.R
+\name{cve}
+\alias{cve}
+\alias{cve.call}
+\title{Implementation of the CVE method.}
+\usage{
+cve(formula, data, method = "simple", max.dim = 10, ...)
+
+cve.call(X, Y, method = "simple", nObs = nrow(X)^0.5, min.dim = 1,
+  max.dim = 10, k, ...)
+}
+\arguments{
+\item{formula}{Formel for the regression model defining `X`, `Y`.
+See: \code{\link{formula}}.}
+
+\item{data}{data.frame holding data for formula.}
+
+\item{method}{The different only differe in the used optimization.
+ All of them are Gradient based optimization on a Stiefel manifold.
+\itemize{
+     \item "simple" Simple reduction of stepsize.
+     \item "sgd" stocastic gradient decent.
+     \item TODO: further
+}}
+
+\item{...}{Further parameters depending on the used method.}
+
+\item{X}{Data}
+
+\item{Y}{Responces}
+
+\item{nObs}{as describet in the Paper.}
+
+\item{k}{guess for SDR dimension.}
+
+\item{nObs}{Like in the paper.}
+
+\item{...}{Method specific parameters.}
+}
+\description{
+Conditional Variance Estimator (CVE) is a novel sufficient dimension
+reduction (SDR) method assuming a model
+\deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
+where B'X is a lower dimensional projection of the predictors.
+}
+\examples{
+library(CVE)
+
+# sample dataset
+ds <- dataset("M5")
+
+# call ´cve´ with default method (aka "simple")
+dr.simple <- cve(ds$Y ~ ds$X, k = ncol(ds$B))
+# plot optimization history (loss via iteration)
+plot(dr.simple, main = "CVE M5 simple")
+
+# call ´cve´ with method "linesearch" using ´data.frame´ as data.
+data <- data.frame(Y = ds$Y, X = ds$X)
+# Note: ´Y, X´ are NOT defined, they are extracted from ´data´.
+dr.linesearch <- cve(Y ~ ., data, method = "linesearch", k = ncol(ds$B))
+plot(dr.linesearch, main = "CVE M5 linesearch")
+
+}
+\references{
+Fertl L., Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
+}
+\seealso{
+\code{\link{formula}}. For a complete parameters list (dependent on
+ the method) see \code{\link{cve_simple}}, \code{\link{cve_sgd}}
+}
diff --git a/CVE_C/man/cve.grid.search.Rd b/CVE_C/man/cve.grid.search.Rd
new file mode 100644
index 0000000..2e7ad79
--- /dev/null
+++ b/CVE_C/man/cve.grid.search.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/gridSearch.R
+\name{cve.grid.search}
+\alias{cve.grid.search}
+\title{Performs a grid search for parameters over a parameter grid.}
+\usage{
+cve.grid.search(X, Y, k, args)
+}
+\description{
+Performs a grid search for parameters over a parameter grid.
+}
+\examples{
+args <- list(
+    h = c(0.05, 0.1, 0.2),
+    method = c("simple", "sgd"),
+    tau = c(0.5, 0.1, 0.01)
+)
+cve.grid.search(args)
+}
diff --git a/CVE_C/man/cve_linesearch.Rd b/CVE_C/man/cve_linesearch.Rd
new file mode 100644
index 0000000..0413346
--- /dev/null
+++ b/CVE_C/man/cve_linesearch.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/cve_linesearch.R
+\name{cve_linesearch}
+\alias{cve_linesearch}
+\title{Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
+conditions.}
+\usage{
+cve_linesearch(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
+  tol = 0.001, rho1 = 0.1, rho2 = 0.9, slack = 0, epochs = 50L,
+  attempts = 10L, max.linesearch.iter = 10L, logger = NULL)
+}
+\description{
+Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
+conditions.
+}
+\keyword{internal}
diff --git a/CVE_C/man/cve_sgd.Rd b/CVE_C/man/cve_sgd.Rd
new file mode 100644
index 0000000..fc00f7d
--- /dev/null
+++ b/CVE_C/man/cve_sgd.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/cve_sgd.R
+\name{cve_sgd}
+\alias{cve_sgd}
+\title{Simple implementation of the CVE method. 'Simple' means that this method is
+a classic GD method unsing no further tricks.}
+\usage{
+cve_sgd(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 0.01,
+  tol = 0.001, epochs = 50L, batch.size = 16L, attempts = 10L,
+  logger = NULL)
+}
+\description{
+Simple implementation of the CVE method. 'Simple' means that this method is
+a classic GD method unsing no further tricks.
+}
+\keyword{internal}
diff --git a/CVE_C/man/cve_simple.Rd b/CVE_C/man/cve_simple.Rd
new file mode 100644
index 0000000..67d185f
--- /dev/null
+++ b/CVE_C/man/cve_simple.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/cve_simple.R
+\name{cve_simple}
+\alias{cve_simple}
+\title{Simple implementation of the CVE method. 'Simple' means that this method is
+a classic GD method unsing no further tricks.}
+\usage{
+cve_simple(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
+  tol = 0.001, slack = 0, epochs = 50L, attempts = 10L,
+  logger = NULL)
+}
+\description{
+Simple implementation of the CVE method. 'Simple' means that this method is
+a classic GD method unsing no further tricks.
+}
+\keyword{internal}
diff --git a/CVE_C/man/dataset.Rd b/CVE_C/man/dataset.Rd
new file mode 100644
index 0000000..da7be5b
--- /dev/null
+++ b/CVE_C/man/dataset.Rd
@@ -0,0 +1,64 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/datasets.R
+\name{dataset}
+\alias{dataset}
+\title{Generates test datasets.}
+\usage{
+dataset(name = "M1", n, B, p.mix = 0.3, lambda = 1)
+}
+\arguments{
+\item{name}{One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}}
+
+\item{n}{nr samples}
+
+\item{p.mix}{Only for \code{"M4"}, see: below.}
+
+\item{lambda}{Only for \code{"M4"}, see: below.}
+
+\item{p}{Dim. of random variable \code{X}.}
+}
+\value{
+List with elements
+\itemize{
+     \item{X}{data}
+     \item{Y}{response}
+     \item{B}{Used dim-reduction matrix}
+     \item{name}{Name of the dataset (name parameter)}
+}
+}
+\description{
+Provides sample datasets. There are 5 different datasets named
+M1, M2, M3, M4 and M5 describet in the paper references below.
+The general model is given by:
+\deqn{Y ~ g(B'X) + \epsilon}
+}
+\section{M1}{
+
+The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
+dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
+The link function \eqn{g} is given as
+\deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + 0.5\epsilon}{g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + 0.5 epsilon}
+}
+
+\section{M2}{
+
+\eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a default of \eqn{n = 200} data points.
+The link function \eqn{g} is given as
+\deqn{g(x) = x_1 x_2^2 + 0.5\epsilon}{g(x) = x_1 x_2^2 + 0.5 epsilon}
+}
+
+\section{M3}{
+
+TODO:
+}
+
+\section{M4}{
+
+TODO:
+}
+
+\section{M5}{
+
+TODO:
+}
+
diff --git a/CVE_C/man/elem.pairs.Rd b/CVE_C/man/elem.pairs.Rd
new file mode 100644
index 0000000..2d36c5d
--- /dev/null
+++ b/CVE_C/man/elem.pairs.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/util.R
+\name{elem.pairs}
+\alias{elem.pairs}
+\title{Creates a (numeric) matrix where each column contains
+an element to element matching.}
+\usage{
+elem.pairs(elements)
+}
+\arguments{
+\item{elements}{numeric vector of elements to match}
+}
+\value{
+matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`.
+}
+\description{
+Creates a (numeric) matrix where each column contains
+an element to element matching.
+}
+\examples{
+ elem.pairs(seq.int(2, 5))
+}
+\keyword{internal}
diff --git a/CVE_C/man/estimate.bandwidth.Rd b/CVE_C/man/estimate.bandwidth.Rd
new file mode 100644
index 0000000..4b2ae94
--- /dev/null
+++ b/CVE_C/man/estimate.bandwidth.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/estimateBandwidth.R
+\name{estimate.bandwidth}
+\alias{estimate.bandwidth}
+\title{Estimated bandwidth for CVE.}
+\usage{
+estimate.bandwidth(X, k, nObs)
+}
+\arguments{
+\item{X}{data matrix of dimension (n x p) with n data points X_i of dimension
+q. Therefor each row represents a datapoint of dimension p.}
+
+\item{k}{Guess for rank(B).}
+
+\item{nObs}{Ether numeric of a function. If specified as numeric value
+its used in the computation of the bandwidth directly. If its a function
+`nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not
+supplied at all is to use \code{nObs <- nrow(x)^0.5}.}
+}
+\description{
+Estimates a propper bandwidth \code{h} according
+\deqn{%
+h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
+h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
+}
+\seealso{
+[\code{\link{qchisq}}]
+}
diff --git a/CVE_C/man/grad.Rd b/CVE_C/man/grad.Rd
new file mode 100644
index 0000000..d890e93
--- /dev/null
+++ b/CVE_C/man/grad.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/gradient.R
+\name{grad}
+\alias{grad}
+\title{Compute get gradient of `L(V)` given a dataset `X`.}
+\usage{
+grad(X, Y, V, h, loss.out = FALSE, loss.only = FALSE,
+  persistent = FALSE)
+}
+\arguments{
+\item{X}{Data matrix.}
+
+\item{Y}{Responce.}
+
+\item{V}{Position to compute the gradient at, aka point on Stiefl manifold.}
+
+\item{h}{Bandwidth}
+
+\item{loss.out}{Iff \code{TRUE} loss will be written to parent environment.}
+
+\item{loss.only}{Boolean to only compute the loss, of \code{TRUE} a single
+value loss is returned and \code{envir} is ignored.}
+
+\item{persistent}{Determines if data indices and dependent calculations shall
+be reused from the parent environment. ATTENTION: Do NOT set this flag, only
+intended for internal usage by carefully aligned functions!}
+}
+\description{
+Compute get gradient of `L(V)` given a dataset `X`.
+}
+\keyword{internal}
diff --git a/CVE_C/man/null.Rd b/CVE_C/man/null.Rd
new file mode 100644
index 0000000..498b159
--- /dev/null
+++ b/CVE_C/man/null.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/util.R
+\name{null}
+\alias{null}
+\title{Null space basis of given matrix `V`}
+\usage{
+null(V)
+}
+\arguments{
+\item{V}{`(p, q)` matrix}
+}
+\value{
+Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`.
+}
+\description{
+Null space basis of given matrix `V`
+}
+\keyword{internal}
diff --git a/CVE_C/man/plot.cve.Rd b/CVE_C/man/plot.cve.Rd
new file mode 100644
index 0000000..35f6921
--- /dev/null
+++ b/CVE_C/man/plot.cve.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/CVE.R
+\name{plot.cve}
+\alias{plot.cve}
+\title{Ploting helper for objects of class \code{cve}.}
+\usage{
+## S3 method for class 'cve'
+plot(x, content = "history", ...)
+}
+\arguments{
+\item{x}{Object of class \code{cve} (result of [cve()]).}
+
+\item{...}{Pass through parameters to [plot()] and [lines()]}
+
+\item{content}{Specifies what to plot:
+\itemize{
+     \item "history" Plots the loss history from stiefel optimization
+         (default).
+     \item ... TODO: add (if there are any)
+}}
+}
+\description{
+Ploting helper for objects of class \code{cve}.
+}
+\seealso{
+see \code{\link{par}} for graphical parameters to pass through
+     as well as \code{\link{plot}} for standard plot utility.
+}
diff --git a/CVE_C/man/rStiefl.Rd b/CVE_C/man/rStiefl.Rd
new file mode 100644
index 0000000..31c40bb
--- /dev/null
+++ b/CVE_C/man/rStiefl.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/util.R
+\name{rStiefl}
+\alias{rStiefl}
+\title{Samples uniform from the Stiefel Manifold}
+\usage{
+rStiefl(p, q)
+}
+\arguments{
+\item{p}{row dim.}
+
+\item{q}{col dim.}
+}
+\value{
+`(p, q)` semi-orthogonal matrix
+}
+\description{
+Samples uniform from the Stiefel Manifold
+}
+\examples{
+ V <- rStiefel(6, 4)
+}
diff --git a/CVE_C/man/summary.cve.Rd b/CVE_C/man/summary.cve.Rd
new file mode 100644
index 0000000..1151f98
--- /dev/null
+++ b/CVE_C/man/summary.cve.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/CVE.R
+\name{summary.cve}
+\alias{summary.cve}
+\title{Prints a summary of a \code{cve} result.}
+\usage{
+\method{summary}{cve}(object, ...)
+}
+\arguments{
+\item{object}{Instance of 'cve' as return of \code{cve}.}
+}
+\description{
+Prints a summary of a \code{cve} result.
+}
diff --git a/CVE_C/src/Makevars b/CVE_C/src/Makevars
new file mode 100644
index 0000000..d3e3f41
--- /dev/null
+++ b/CVE_C/src/Makevars
@@ -0,0 +1,14 @@
+
+## With R 3.1.0 or later, you can uncomment the following line to tell R to 
+## enable compilation with C++11 (where available)
+##
+## Also, OpenMP support in Armadillo prefers C++11 support. However, for wider
+## availability of the package we do not yet enforce this here.  It is however
+## recommended for client packages to set it.
+##
+## And with R 3.4.0, and RcppArmadillo 0.7.960.*, we turn C++11 on as OpenMP
+## support within Armadillo prefers / requires it
+CXX_STD = CXX11
+
+PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) 
+PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
diff --git a/CVE_C/src/Makevars.win b/CVE_C/src/Makevars.win
new file mode 100644
index 0000000..d3e3f41
--- /dev/null
+++ b/CVE_C/src/Makevars.win
@@ -0,0 +1,14 @@
+
+## With R 3.1.0 or later, you can uncomment the following line to tell R to 
+## enable compilation with C++11 (where available)
+##
+## Also, OpenMP support in Armadillo prefers C++11 support. However, for wider
+## availability of the package we do not yet enforce this here.  It is however
+## recommended for client packages to set it.
+##
+## And with R 3.4.0, and RcppArmadillo 0.7.960.*, we turn C++11 on as OpenMP
+## support within Armadillo prefers / requires it
+CXX_STD = CXX11
+
+PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) 
+PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
diff --git a/CVE_C/src/config.h b/CVE_C/src/config.h
new file mode 100644
index 0000000..33dc4c9
--- /dev/null
+++ b/CVE_C/src/config.h
@@ -0,0 +1,7 @@
+#ifndef CVE_INCLUDE_GUARD_CONFIG_
+#define CVE_INCLUDE_GUARD_CONFIG_
+
+#define CVE_MEM_CHUNK_SIZE 2032
+#define CVE_MEM_CHUNK_SMALL 1016
+
+#endif /* CVE_INCLUDE_GUARD_CONFIG_ */
\ No newline at end of file
diff --git a/CVE_C/src/export.c b/CVE_C/src/export.c
new file mode 100644
index 0000000..445abd0
--- /dev/null
+++ b/CVE_C/src/export.c
@@ -0,0 +1,29 @@
+#include <Rinternals.h>
+
+void grad(const int n, const int p, const int q,
+          const double *X,
+          const double *X_diff,
+          const double *Y,
+          const double *V,
+          const double h,
+          double *G, double *loss);
+
+SEXP grad_c(SEXP X, SEXP X_diff, SEXP Y, SEXP V, SEXP h) {
+    SEXP G = PROTECT(allocMatrix(REALSXP, nrows(V), ncols(V)));
+    SEXP loss = PROTECT(ScalarReal(0.0));
+
+    grad(nrows(X), ncols(X), ncols(V),
+         REAL(X), REAL(X_diff), REAL(Y), REAL(V), *REAL(h),
+         REAL(G), REAL(loss));
+
+    SEXP out = PROTECT(allocVector(VECSXP, 2));
+    SET_VECTOR_ELT(out, 0, G);
+    SET_VECTOR_ELT(out, 1, loss);
+    SEXP names = PROTECT(allocVector(STRSXP, 2));
+    SET_STRING_ELT(names, 0, mkChar("G"));
+    SET_STRING_ELT(names, 1, mkChar("loss"));
+    setAttrib(out, install("names"), names);
+
+    UNPROTECT(4);
+    return out;
+}
diff --git a/CVE_C/src/grad.c b/CVE_C/src/grad.c
new file mode 100644
index 0000000..b90f297
--- /dev/null
+++ b/CVE_C/src/grad.c
@@ -0,0 +1,123 @@
+#include <stdlib.h>
+#include <math.h>
+
+#include "sums.h"
+#include "sweep.h"
+#include "matrix.h"
+#include "indexing.h"
+
+// TODO: clarify
+static inline double gaussKernel(const double x, const double scale) {
+    return exp(scale * x * x);
+}
+
+// TODO: mutch potential for optimization!!!
+static void weightedYandLoss(const int n,
+                             const double *Y,
+                             const double *vecD,
+                             const double *vecW,
+                             const double *colSums,
+                             double *y1, double *L, double *vecS,
+                             double *loss) {
+    int i, j, k, N = n * (n - 1) / 2;
+    double l;
+
+    for (i = 0; i < n; ++i) {
+        y1[i] = Y[i] / colSums[i];
+        L[i] = Y[i] * Y[i] / colSums[i];
+    }
+
+    for (k = j = 0; j < n; ++j) {
+        for (i = j + 1; i < n; ++i, ++k) {
+            y1[i] += Y[j] * vecW[k] / colSums[i];
+            y1[j] += Y[i] * vecW[k] / colSums[j];
+            L[i] += Y[j] * Y[j] * vecW[k] / colSums[i];
+            L[j] += Y[i] * Y[i] * vecW[k] / colSums[j];
+        }
+    }
+
+    l = 0.0;
+    for (i = 0; i < n; ++i) {
+        l += (L[i] -= y1[i] * y1[i]);
+    }
+    *loss = l / (double)n;
+
+    for (k = j = 0; j < n; ++j) {
+        for (i = j + 1; i < n; ++i, ++k) {
+            l = Y[j] - y1[i];
+            vecS[k]  = (L[i] - (l * l)) / colSums[i];
+            l = Y[i] - y1[j];
+            vecS[k] += (L[j] - (l * l)) / colSums[j];
+        }
+    }
+
+    for (k = 0; k < N; ++k) {
+        vecS[k] *= vecW[k] * vecD[k];
+    }
+}
+
+void grad(const int n, const int p, const int q,
+          const double *X,
+          const double *X_diff,
+          const double *Y,
+          const double *V,
+          const double h,
+          double *G, double *loss) {
+    // Number of X_i to X_j not trivial pairs.
+    int i, N = (n * (n - 1)) / 2;
+    double scale = -0.5 / h;
+
+    if (X_diff == (void*)0) {
+        // TODO: ...
+    }
+
+    // Allocate and compute projection matrix `Q = I_p - V * V^T`
+    double *Q = (double*)malloc(p * p * sizeof(double));
+    nullProj(V, p, q, Q);
+
+    // allocate and compute vectorized distance matrix with a temporary
+    // projection of `X_diff`.
+    double *vecD = (double*)malloc(N * sizeof(double));
+    double *X_proj;
+    if (p < 5) { // TODO: refine that!
+        X_proj = (double*)malloc(N * 5 * sizeof(double));
+    } else {
+        X_proj = (double*)malloc(N * p * sizeof(double));
+    }
+    matrixprod(X_diff, N, p, Q, p, p, X_proj);
+    rowSquareSums(X_proj, N, p, vecD);
+
+    // Apply kernel to distence vector for weights computation.
+    double *vecW = X_proj; // reuse memory area, no longer needed.
+    for (i = 0; i < N; ++i) {
+        vecW[i] = gaussKernel(vecD[i], scale);
+    }
+    double *colSums = X_proj + N; // still allocated!
+    rowSumsSymVec(vecW, n, 1.0, colSums); // rowSums = colSums cause Sym
+
+    // compute weighted responces of first end second momontum, aka y1, y2.
+    double *y1 = X_proj + N + n;
+    double *L = X_proj + N + (2 * n);
+    // Allocate X_diff scaling vector `vecS`, not in `X_proj` mem area because
+    // used symultanious to X_proj in final gradient computation.
+    double *vecS = (double*)malloc(N * sizeof(double));
+    weightedYandLoss(n, Y, vecD, vecW, colSums, y1, L, vecS, loss);
+
+    // compute the gradient using X_proj for intermidiate scaled X_diff.
+    rowSweep(X_diff, N, p, "*", vecS, X_proj);
+    // reuse Q which has the required dim (p, p).
+    crossprod(X_diff, N, p, X_proj, N, p, Q);
+    // Product with V
+    matrixprod(Q, p, p, V, p, q, G);
+    // And final scaling (TODO: move into matrixprod!)
+    scale = -2.0 / (((double)n) * h * h);
+    N = p * q;
+    for (i = 0; i < N; ++i) {
+        G[i] *= scale;
+    }
+
+    free(vecS);
+    free(X_proj);
+    free(vecD);
+    free(Q);
+}
diff --git a/CVE_C/src/indexing.c b/CVE_C/src/indexing.c
new file mode 100644
index 0000000..36a91e7
--- /dev/null
+++ b/CVE_C/src/indexing.c
@@ -0,0 +1,12 @@
+#include "indexing.h"
+
+void rangePairs(const int from, const int to, int *pairs) {
+    int i, j, k;
+
+    for (k = 0, i = from; i < to; ++i) {
+       for (j = i + 1; j < to; ++j, k += 2) {
+            pairs[k] = i;
+            pairs[k + 1] = j;
+        }
+    }
+}
diff --git a/CVE_C/src/indexing.h b/CVE_C/src/indexing.h
new file mode 100644
index 0000000..db49581
--- /dev/null
+++ b/CVE_C/src/indexing.h
@@ -0,0 +1,8 @@
+
+/* Include Guard */
+#ifndef CVE_INCLUDE_GUARD_INDEXING_
+#define CVE_INCLUDE_GUARD_INDEXING_
+
+void rangePairs(const int from, const int to, int *pairs);
+
+#endif /* CVE_INCLUDE_GUARD_INDEXING_ */
diff --git a/CVE_C/src/init.c b/CVE_C/src/init.c
new file mode 100644
index 0000000..e3a859f
--- /dev/null
+++ b/CVE_C/src/init.c
@@ -0,0 +1,23 @@
+#include <R.h>
+#include <Rinternals.h>
+#include <stdlib.h> // for NULL
+#include <R_ext/Rdynload.h>
+
+/* FIXME: 
+   Check these declarations against the C/Fortran source code.
+*/
+
+/* .Call calls */
+extern SEXP grad_c(SEXP, SEXP, SEXP, SEXP, SEXP);
+
+static const R_CallMethodDef CallEntries[] = {
+    {"grad_c",          (DL_FUNC) &grad_c,          5},
+    {NULL, NULL, 0}
+};
+
+/* Restrict C entrypoints to registered routines. */
+void R_initCVE(DllInfo *dll)
+{
+    R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
+    R_useDynamicSymbols(dll, FALSE);
+}
diff --git a/CVE_C/src/matrix.c b/CVE_C/src/matrix.c
new file mode 100644
index 0000000..50a07ad
--- /dev/null
+++ b/CVE_C/src/matrix.c
@@ -0,0 +1,71 @@
+#include <string.h> // for `mem*` functions.
+
+#include "config.h"
+#include "matrix.h"
+
+#include <R_ext/BLAS.h>
+
+void matrixprod(const double *A, const int nrowA, const int ncolA,
+                const double *B, const int nrowB, const int ncolB,
+                double *C) {
+    const double one = 1.0;
+    const double zero = 0.0;
+
+    // DGEMM with parameterization:
+    //     C <- A %*% B
+    F77_NAME(dgemm)("N", "N", &nrowA, &ncolB, &ncolA,
+                    &one, A, &nrowA, B, &nrowB,
+                    &zero, C, &nrowA);
+}
+
+void crossprod(const double *A, const int nrowA, const int ncolA,
+               const double *B, const int nrowB, const int ncolB,
+               double *C) {
+    const double one = 1.0;
+    const double zero = 0.0;
+
+    // DGEMM with parameterization:
+    //     C <- t(A) %*% B
+    F77_NAME(dgemm)("T", "N", &ncolA, &ncolB, &nrowA,
+                    &one, A, &nrowA, B, &nrowB,
+                    &zero, C, &ncolA);
+}
+
+void nullProj(const double *B, const int nrowB, const int ncolB,
+              double *Q) {
+    const double minusOne = -1.0;
+    const double one = 1.0;
+
+    // Initialize as identity matrix.
+    memset(Q, 0, sizeof(double) * nrowB * nrowB);
+    double *Q_diag, *Q_end = Q + nrowB * nrowB;
+    for (Q_diag = Q; Q_diag < Q_end; Q_diag += nrowB + 1) {
+        *Q_diag = 1.0;
+    }
+
+    // DGEMM with parameterization:
+    //     C <- (-1.0 * B %*% t(B)) + C
+    F77_NAME(dgemm)("N", "T", &nrowB, &nrowB, &ncolB,
+                    &minusOne, B, &nrowB, B, &nrowB,
+                    &one, Q, &nrowB);
+}
+
+// A dence skwe-symmetric rank 2 update.
+// Perform the update
+//      C := alpha (A * B^T - B * A^T) + beta C
+void skewSymRank2k(const int nrow, const int ncol,
+                   double alpha, const double *A, const double *B,
+                   double beta,
+                   double *C) {
+    F77_NAME(dgemm)("N", "T",
+             &nrow, &nrow, &ncol,
+             &alpha, A, &nrow, B, &nrow,
+             &beta, C, &nrow);
+
+    alpha *= -1.0;
+    beta = 1.0;
+    F77_NAME(dgemm)("N", "T",
+             &nrow, &nrow, &ncol,
+             &alpha, B, &nrow, A, &nrow,
+             &beta, C, &nrow);
+}
diff --git a/CVE_C/src/matrix.h b/CVE_C/src/matrix.h
new file mode 100644
index 0000000..e9ce39b
--- /dev/null
+++ b/CVE_C/src/matrix.h
@@ -0,0 +1,25 @@
+
+/* Include Guard */
+#ifndef CVE_INCLUDE_GUARD_MATRIX_
+#define CVE_INCLUDE_GUARD_MATRIX_
+
+void matrixprod(const double *A, const int nrowA, const int ncolA,
+                const double *B, const int nrowB, const int ncolB,
+                       double *C);
+
+void crossprod(const double *A, const int nrowA, const int ncolA,
+               const double *B, const int nrowB, const int ncolB,
+               double *C);
+
+void nullProj(const double *B, const int nrowB, const int ncolB,
+              double *Q);
+
+// A dence skwe-symmetric rank 2 update.
+// Perform the update
+//      C := alpha (A * B^T - B * A^T) + beta C
+void skewSymRank2k(const int nrow, const int ncol,
+                   double alpha, const double *A, const double *B,
+                   double beta,
+                   double *C);
+
+#endif /* CVE_INCLUDE_GUARD_MATRIX_ */
diff --git a/CVE_C/src/sums.c b/CVE_C/src/sums.c
new file mode 100644
index 0000000..068aa55
--- /dev/null
+++ b/CVE_C/src/sums.c
@@ -0,0 +1,113 @@
+#include <string.h> // for `mem*` functions.
+
+#include "config.h"
+#include "sums.h"
+
+void rowSums(const double *A, const int nrow, const int ncol,
+             double *sum) {
+    int i, j, block_size, block_size_i;
+    const double *A_block = A;
+    const double *A_end = A + nrow * ncol;
+
+    if (nrow > CVE_MEM_CHUNK_SIZE) {
+        block_size = CVE_MEM_CHUNK_SIZE;
+    } else {
+        block_size = nrow;
+    }
+
+    // Iterate `(block_size_i, ncol)` submatrix blocks.
+    for (i = 0; i < nrow; i += block_size_i) {
+        // Reset `A` to new block beginning.
+        A = A_block;
+        // Take block size of eveything left and reduce to max size.
+        block_size_i = nrow - i;
+        if (block_size_i > block_size) {
+            block_size_i = block_size;
+        }
+        // Compute first blocks column,
+        for (j = 0; j < block_size_i; ++j) {
+            sum[j] = A[j];
+        }
+        // and sum the following columns to the first one.
+        for (A += nrow; A < A_end; A += nrow) {
+            for (j = 0; j < block_size_i; ++j) {
+                sum[j] += A[j];
+            }
+        }
+        // Step one block forth.
+        A_block += block_size_i;
+        sum += block_size_i;
+    }
+}
+
+void colSums(const double *A, const int nrow, const int ncol,
+             double *sum) {
+    int j;
+    double *sum_end = sum + ncol;
+
+    memset(sum, 0, sizeof(double) * ncol);
+    for (; sum < sum_end; ++sum) {
+        for (j = 0; j < nrow; ++j) {
+            *sum += A[j];
+        }
+        A += nrow;
+    }
+}
+
+void rowSquareSums(const double *A, const int nrow, const int ncol,
+                   double *sum) {
+    int i, j, block_size, block_size_i;
+    const double *A_block = A;
+    const double *A_end = A + nrow * ncol;
+
+    if (nrow < CVE_MEM_CHUNK_SIZE) {
+        block_size = nrow;
+    } else {
+        block_size = CVE_MEM_CHUNK_SIZE;
+    }
+
+    // Iterate `(block_size_i, ncol)` submatrix blocks.
+    for (i = 0; i < nrow; i += block_size_i) {
+        // Reset `A` to new block beginning.
+        A = A_block;
+        // Take block size of eveything left and reduce to max size.
+        block_size_i = nrow - i;
+        if (block_size_i > block_size) {
+            block_size_i = block_size;
+        }
+        // Compute first blocks column,
+        for (j = 0; j < block_size_i; ++j) {
+            sum[j] = A[j] * A[j];
+        }
+        // and sum the following columns to the first one.
+        for (A += nrow; A < A_end; A += nrow) {
+            for (j = 0; j < block_size_i; ++j) {
+                sum[j] += A[j] * A[j];
+            }
+        }
+        // Step one block forth.
+        A_block += block_size_i;
+        sum += block_size_i;
+    }
+}
+
+void rowSumsSymVec(const double *Avec, const int nrow,
+                   const double diag,
+                   double *sum) {
+    int i, j;
+
+    if (diag == 0.0) {
+        memset(sum, 0, nrow * sizeof(double));
+    } else {
+        for (i = 0; i < nrow; ++i) {
+            sum[i] = diag;
+        }
+    }
+
+    for (j = 0; j < nrow; ++j) {
+        for (i = j + 1; i < nrow; ++i, ++Avec) {
+            sum[j] += *Avec;
+            sum[i] += *Avec;
+        }
+    }
+}
diff --git a/CVE_C/src/sums.h b/CVE_C/src/sums.h
new file mode 100644
index 0000000..f409487
--- /dev/null
+++ b/CVE_C/src/sums.h
@@ -0,0 +1,19 @@
+
+/* Include Guard */
+#ifndef CVE_INCLUDE_GUARD_SUMS_
+#define CVE_INCLUDE_GUARD_SUMS_
+
+void rowSums(const double *A, const int nrow, const int ncol,
+             double *sum);
+
+void colSums(const double *A, const int nrow, const int ncol,
+             double *sum);
+
+void rowSquareSums(const double *A, const int nrow, const int ncol,
+                   double *sum);
+
+void rowSumsSymVec(const double *Avec, const int nrow,
+                   const double diag,
+                   double *sum);
+
+#endif /* CVE_INCLUDE_GUARD_SUMS_ */
diff --git a/CVE_C/src/sweep.c b/CVE_C/src/sweep.c
new file mode 100644
index 0000000..8678e92
--- /dev/null
+++ b/CVE_C/src/sweep.c
@@ -0,0 +1,113 @@
+#include <R_ext/Error.h> // for `error`.
+
+#include "config.h"
+#include "sweep.h"
+
+/* C[, j] = A[, j] * v for each j = 1 to ncol */
+void rowSweep(const double *A, const int nrow, const int ncol,
+              const char* op,
+              const double *v, // vector of length nrow
+              double *C) {
+    int i, j, block_size, block_size_i;
+    const double *A_block = A;
+    double *C_block = C;
+    const double *A_end = A + nrow * ncol;
+
+    if (nrow > CVE_MEM_CHUNK_SMALL) { // small because 3 vectors in cache
+        block_size = CVE_MEM_CHUNK_SMALL;
+    } else {
+        block_size = nrow;
+    }
+
+    if (*op == '+') {
+        // Iterate `(block_size_i, ncol)` submatrix blocks.
+        for (i = 0; i < nrow; i += block_size_i) {
+            // Set `A` and `C` to block beginning.
+            A = A_block;
+            C = C_block;
+            // Get current block's row size.
+            block_size_i = nrow - i;
+            if (block_size_i > block_size) {
+                block_size_i = block_size;
+            }
+            // Perform element wise operation for block.
+            for (; A < A_end; A += nrow, C += nrow) {
+                for (j = 0; j < block_size_i; ++j) {
+                    C[j] = A[j] + v[j]; // FUN = '+'
+                }
+            }
+            // Step one block forth.
+            A_block += block_size_i;
+            C_block += block_size_i;
+            v += block_size_i;
+        }
+    } else if (*op == '-') {
+        // Iterate `(block_size_i, ncol)` submatrix blocks.
+        for (i = 0; i < nrow; i += block_size_i) {
+            // Set `A` and `C` to block beginning.
+            A = A_block;
+            C = C_block;
+            // Get current block's row size.
+            block_size_i = nrow - i;
+            if (block_size_i > block_size) {
+                block_size_i = block_size;
+            }
+            // Perform element wise operation for block.
+            for (; A < A_end; A += nrow, C += nrow) {
+                for (j = 0; j < block_size_i; ++j) {
+                    C[j] = A[j] - v[j]; // FUN = '-'
+                }
+            }
+            // Step one block forth.
+            A_block += block_size_i;
+            C_block += block_size_i;
+            v += block_size_i;
+        }
+    } else if (*op == '*') {
+        // Iterate `(block_size_i, ncol)` submatrix blocks.
+        for (i = 0; i < nrow; i += block_size_i) {
+            // Set `A` and `C` to block beginning.
+            A = A_block;
+            C = C_block;
+            // Get current block's row size.
+            block_size_i = nrow - i;
+            if (block_size_i > block_size) {
+                block_size_i = block_size;
+            }
+            // Perform element wise operation for block.
+            for (; A < A_end; A += nrow, C += nrow) {
+                for (j = 0; j < block_size_i; ++j) {
+                    C[j] = A[j] * v[j]; // FUN = '*'
+                }
+            }
+            // Step one block forth.
+            A_block += block_size_i;
+            C_block += block_size_i;
+            v += block_size_i;
+        }
+    } else if (*op == '/') {
+        // Iterate `(block_size_i, ncol)` submatrix blocks.
+        for (i = 0; i < nrow; i += block_size_i) {
+            // Set `A` and `C` to block beginning.
+            A = A_block;
+            C = C_block;
+            // Get current block's row size.
+            block_size_i = nrow - i;
+            if (block_size_i > block_size) {
+                block_size_i = block_size;
+            }
+            // Perform element wise operation for block.
+            for (; A < A_end; A += nrow, C += nrow) {
+                for (j = 0; j < block_size_i; ++j) {
+                    C[j] = A[j] / v[j]; // FUN = '/'
+                }
+            }
+            // Step one block forth.
+            A_block += block_size_i;
+            C_block += block_size_i;
+            v += block_size_i;
+        }
+    } else {
+        error("Got unknown 'op' (opperation) argument.");
+    }
+}
diff --git a/CVE_C/src/sweep.h b/CVE_C/src/sweep.h
new file mode 100644
index 0000000..90be784
--- /dev/null
+++ b/CVE_C/src/sweep.h
@@ -0,0 +1,11 @@
+
+/* Include Guard */
+#ifndef CVE_INCLUDE_GUARD_SWEEP_
+#define CVE_INCLUDE_GUARD_SWEEP_
+
+void rowSweep(const double *A, const int nrow, const int ncol,
+              const char* op,
+              const double *v, // vector of length nrow
+              double *C);
+
+#endif /* CVE_INCLUDE_GUARD_SWEEP_ */
diff --git a/notes.md b/notes.md
index 75602ba..ccb04a0 100644
--- a/notes.md
+++ b/notes.md
@@ -6,6 +6,35 @@ grep --include=*\.{c,h,R} -rnw '.' -e "sweep"
 ```
 searches in all `C` source and header fils as well as `R` source files for the term _sweep_.
 
+## Recursive dir. compair with colored sructure (more or less).
+```bash
+diff -r CVE_R/ CVE_C/ | grep -E "^([<>]|[^<>].*)"
+```
+
+## Parsing `bash` script parameters.
+```bash
+usage="$0 [-v|--verbose] [-n|--dry-run] [(-s|--stack-size) <size>] [-h|--help] [-- [p1, [p2, ...]]]"
+verbose=false
+help=false
+dry_run=false
+stack_size=0
+
+while [ $# -gt 0 ]; do
+    case "$1" in
+        -v | --verbose )    verbose=true;      shift ;;
+        -n | --dry-run )    dry_run=true;      shift ;;
+        -s | --stack-size ) stack_size="$2";   shift; shift ;;
+        -h | --help )       echo $usage;       exit ;; # On help print usage and exit.
+        -- ) shift; break ;;            # Break param "parsing".
+         * ) echo $usage >&2; exit 1 ;; # Print usage and exit with failure.
+    esac
+done
+
+echo verbose=$verbose
+echo dry_run=$dry_run
+echo stack_size=$stack_size
+```
+
 # Development
 ## Build and install.
 To build the package the `devtools` package is used. This also provides `roxygen2` which is used for documentation and authomatic creaton of the `NAMESPACE` file.
diff --git a/wip.R b/wip.R
index 87fb9e8..25eb66d 100644
--- a/wip.R
+++ b/wip.R
@@ -118,6 +118,17 @@ microbenchmark(
 )
 
 ## Matrix-Matrix opperation .call ---------------------------------------------
+transpose.c <- function(A) {
+    stopifnot(
+        is.matrix(A), is.numeric(A)
+    )
+    if (!is.double(A)) {
+        A <- matrix(as.double(A), nrow(A), ncol(A))
+    }
+
+    .Call('R_transpose', PACKAGE = 'wip', A)
+}
+
 matrixprod.c <- function(A, B) {
     stopifnot(
         is.matrix(A), is.numeric(A),
@@ -174,6 +185,14 @@ m <- 300
 
 A <- matrix(runif(n * k), n, k)
 B <- matrix(runif(k * m), k, m)
+stopifnot(
+    all.equal(t(A), transpose.c(A))
+)
+microbenchmark(
+    t(A),
+    transpose.c(A)
+)
+
 stopifnot(
     all.equal(A %*% B, matrixprod.c(A, B))
 )
@@ -326,4 +345,4 @@ stopifnot(all.equal(
 microbenchmark(
     grad = grad(X, Y, V, h),
     gradient.c = gradient.c(X, X_diff, Y, V, h)
-)
\ No newline at end of file
+)
diff --git a/wip.c b/wip.c
index 0c764eb..75c77d6 100644
--- a/wip.c
+++ b/wip.c
@@ -1,4 +1,5 @@
 #include <stdlib.h>
+#include <string.h> // for `mem*` functions.
 
 #include <R_ext/BLAS.h>
 #include <R_ext/Lapack.h>
@@ -99,15 +100,15 @@ static inline void rowSquareSums(const double *A,
 }
 
 static inline void rowSumsSymVec(const double *Avec, const int nrow,
-                                 const double *diag,
+                                 const double diag,
                                  double *sum) {
     int i, j;
 
-    if (*diag == 0.0) {
+    if (diag == 0.0) {
         memset(sum, 0, nrow * sizeof(double));
     } else {
         for (i = 0; i < nrow; ++i) {
-            sum[i] = *diag;
+            sum[i] = diag;
         }
     }
 
@@ -228,6 +229,18 @@ static void rowSweep(const double *A, const int nrow, const int ncol,
     }
 }
 
+void transpose(const double *A, const int nrow, const int ncol, double* T) {
+    int i, j, len = nrow * ncol;
+
+    // Filling column-wise and accessing row-wise.
+    for (i = 0, j = 0; i < len; ++i, j += nrow) {
+        if (j >= len) {
+            j -= len - 1;
+        }
+        T[i] = A[j];
+    }
+}
+
 static inline void matrixprod(const double *A, const int nrowA, const int ncolA,
                               const double *B, const int nrowB, const int ncolB,
                               double *C) {
@@ -363,7 +376,6 @@ static void gradient(const int n, const int p, const int q,
     // Number of X_i to X_j not trivial pairs.
     int i, N = (n * (n - 1)) / 2;
     double scale = -0.5 / h;
-    const double one = 1.0;
 
     if (X_diff == (void*)0) {
         // TODO: ...
@@ -391,7 +403,7 @@ static void gradient(const int n, const int p, const int q,
         vecW[i] = gaussKernel(vecD[i], scale);
     }
     double *colSums = X_proj + N; // still allocated!
-    rowSumsSymVec(vecW, n, &one, colSums); // rowSums = colSums cause Sym
+    rowSumsSymVec(vecW, n, 1.0, colSums); // rowSums = colSums cause Sym
 
     // compute weighted responces of first end second momontum, aka y1, y2.
     double *y1 = X_proj + N + n;
diff --git a/wip.h b/wip.h
index a2d03f8..1e1f7dc 100644
--- a/wip.h
+++ b/wip.h
@@ -1,5 +1,5 @@
-#ifndef _CVE_INCLUDE_GUARD_
-#define _CVE_INCLUDE_GUARD_
+#ifndef CVE_INCLUDE_GUARD_
+#define CVE_INCLUDE_GUARD_
 
 #include <Rinternals.h>
 
@@ -41,12 +41,12 @@ SEXP R_rowSquareSums(SEXP A) {
 }
 
 static inline void rowSumsSymVec(const double *Avec, const int nrow,
-                                 const double *diag,
+                                 const double diag,
                                  double *sum);
 SEXP R_rowSumsSymVec(SEXP Avec, SEXP nrow, SEXP diag) {
     SEXP sum = PROTECT(allocVector(REALSXP, *INTEGER(nrow)));
 
-    rowSumsSymVec(REAL(Avec), *INTEGER(nrow), REAL(diag), REAL(sum));
+    rowSumsSymVec(REAL(Avec), *INTEGER(nrow), *REAL(diag), REAL(sum));
 
     UNPROTECT(1);
     return sum;
@@ -67,6 +67,16 @@ SEXP R_rowSweep(SEXP A, SEXP v, SEXP op) {
     return C;
 }
 
+void transpose(const double *A, const int nrow, const int ncol, double* T);
+SEXP R_transpose(SEXP A) {
+    SEXP T = PROTECT(allocMatrix(REALSXP, ncols(A), nrows(A)));
+
+    transpose(REAL(A), nrows(A), ncols(A), REAL(T));
+
+    UNPROTECT(1); /* T */
+    return T;
+}
+
 static inline void matrixprod(const double *A, const int nrowA, const int ncolA,
                               const double *B, const int nrowB, const int ncolB,
                               double *C);
@@ -156,4 +166,4 @@ SEXP R_gradient(SEXP X, SEXP X_diff, SEXP Y, SEXP V, SEXP h) {
     return G;
 }
 
-#endif /* _CVE_INCLUDE_GUARD_ */
+#endif /* CVE_INCLUDE_GUARD_ */