- update: doc,

- wip: cleanup, - update: datasets, - remove: old code
2019-12-16 17:34:35 +01:00 · 2019-12-16 17:34:35 +01:00 · 9edefe994d
parent 300fc11f3f
commit 9edefe994d
84 changed files with 812 additions and 4077 deletions
--- a/CVE_C/DESCRIPTION
+++ b/CVE_C/DESCRIPTION
--- a/CVE_C/NAMESPACE
+++ b/CVE_C/NAMESPACE
@ -9,15 +9,10 @@ export(cve)
 export(cve.call)
 export(dataset)
 export(directions)
 export(elem.pairs)
 export(estimate.bandwidth)
 export(null)
 export(predict_dim)
 export(projTangentStiefel)
 export(rStiefel)
 export(retractStiefel)
 export(skew)
 export(sym)
 import(stats)
 importFrom(graphics,boxplot)
 importFrom(graphics,lines)
--- a/CVE_C/R/CVE.R
+++ b/CVE_C/R/CVE.R
@ -20,7 +20,7 @@
 #' zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
 #' is an unknown, continuous non-constant function,
 #' and \eqn{B = (b_1, ..., b_k)} is
-#' a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}. 
+#' a real \eqn{p \times k}{p x k} of rank \eqn{k \leq p}{k <= p}. 
 #' Without loss of generality \eqn{B} is assumed to be orthonormal.
 #'
 #' @author Daniel Kapla, Lukas Fertl, Bura Efstathia
@ -36,26 +36,46 @@
 #' @inherit CVE-package description
 #'
 #' @param formula an object of class \code{"formula"} which is a symbolic
-#' description of the model to be fitted.
+#' description of the model to be fitted like \eqn{Y\sim X}{Y ~ X} where
 #' \eqn{Y} is a \eqn{n}-dimensional vector of the response variable and
 #' \eqn{X} is a \eqn{n\times p}{n x p} matrix of the predictors.
 #' @param data an optional data frame, containing the data for the formula if
-#' supplied.
+#' supplied like \code{data <- data.frame(Y, X)} with dimension
-#' @param method specifies the CVE method variation as one of
+#' \eqn{n \times (p + 1)}{n x (p + 1)}. By default the variables are taken from
 #' the environment from which \code{cve} is called.
 #' @param method This character string specifies the method of fitting. The
 #' options are
 #' \itemize{
-#'      \item "simple" exact implementation as described in the paper listed 
+#'      \item "simple" implementation as described in the paper.
-#'          below.
+#'      \item "weighted" variation with adaptive weighting of slices.
 #'      \item "weighted" variation with addaptive weighting of slices.
 #' }
 #' see paper.
 #' @param max.dim upper bounds for \code{k}, (ignored if \code{k} is supplied).
-#' @param ... Parameters passed on to \code{cve.call}.
+#' @param ... optional parameters passed on to \code{cve.call}.
 #'
 #' @return an S3 object of class \code{cve} with components:
 #' \describe{
-#'    \item{X}{Original training data,}
+#'    \item{X}{design matrix of predictor vector used for calculating
-#'    \item{Y}{Responce of original training data,}
+#'      cve-estimate,}
 #'    \item{Y}{\eqn{n}-dimensional vector of responses used for calculating
 #'        cve-estimate,}
 #'    \item{method}{Name of used method,}
 #'    \item{call}{the matched call,}
-#'    \item{res}{list of components \code{V, L, B, loss, h} and \code{k} for
+#'    \item{res}{list of components \code{V, L, B, loss, h} for
-#'               each \eqn{k=min.dim,...,max.dim} (dimension).}
+#'       each \code{k = min.dim, ..., max.dim}. If \code{k} was supplied in the
 #'       call \code{min.dim = max.dim = k}.
 #'       \itemize{
 #'           \item \code{B} is the cve-estimate with dimension
 #'               \eqn{p\times k}{p x k}.
 #'           \item \code{V} is the orthogonal complement of \eqn{B}.
 #'           \item \code{L} is the loss for each sample seperatels such that
 #'               it's mean is \code{loss}.
 #'           \item \code{loss} is the value of the target function that is 
 #'               minimized, evaluated at \eqn{V}.
 #'           \item \code{h} bandwidth parameter used to calculate
 #'               \code{B, V, loss, L}.
 #'       }
 #'    }
 #' }
 #'
 #' @examples
@ -66,7 +86,7 @@
 #' b1 <- rep(1 / sqrt(p), p)
 #' b2 <- (-1)^seq(1, p) / sqrt(p)
 #' B <- cbind(b1, b2)
-#' # samplsize
+#' # sample size
 #' n <- 200
 #' set.seed(21)
 #' # creat predictor data x ~ N(0, I_p)
@ -139,10 +159,12 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
 #' @inherit cve title
 #' @inherit cve description
 #'
 #' @param X Design matrix with dimension \eqn{n\times p}{n x p}.
 #' @param Y numeric array of length \eqn{n} of Responses.
 #' @param h bandwidth or function to estimate bandwidth, defaults to internaly
 #'      estimated bandwidth.
 #' @param nObs parameter for choosing bandwidth \code{h} using
 #'   \code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied).
 #' @param X data matrix with samples in its rows.
 #' @param Y Responses (1 dimensional).
 #' @param method specifies the CVE method variation as one of
 #' \itemize{
 #'      \item "simple" exact implementation as described in the paper listed 
@ -156,19 +178,23 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
 #' @param tau Initial step-size.
 #' @param tol Tolerance for break condition.
 #' @param max.iter maximum number of optimization steps.
-#' @param attempts number of arbitrary different starting points.
+#' @param attempts If \code{V.init} not supplied, the optimization is carried
-#' @param logger a logger function (only for advanced user, significantly slows
+#'      out \code{attempts} times with starting values drawn from the invariant
-#'      down the computation).
+#'      measure on the Stiefel manifold (see \code{\link{rStiefel}}).
-#' @param h bandwidth or function to estimate bandwidth, defaults to internaly
+#' @param momentum number of \eqn{[0, 1)} giving the ration of momentum for
-#'      estimated bandwidth.
+#'      eucledian gradient update with a momentum term. \code{momentum = 0}
-#' @param momentum number of [0, 1) giving the ration of momentum for eucledian
+#'      corresponds to normal gradient descend.
 #'      gradient update with a momentum term.
 #' @param slack Positive scaling to allow small increases of the loss while
-#'      optimizing.
+#'      optimizing, i.e. \code{slack = 0.1} allows the target function to
-#' @param gamma step-size reduction multiple.
+#'      increase up to \eqn{10 \%} in one optimization step.
 #' @param gamma step-size reduction multiple. If gradient step with step size
 #'      \code{tau} is not accepted \code{gamma * tau} is set to the next step
 #'      size.
 #' @param V.init Semi-orthogonal matrix of dimensions `(ncol(X), ncol(X) - k)
-#'      as optimization starting value. (If supplied, \code{attempts} is
+#'      used as starting value in the optimization. (If supplied,
-#'      set to 1 and \code{k} to match dimension)
+#'      \code{attempts} is set to 0 and \code{k} to match dimension).
 #' @param logger a logger function (only for advanced user, slows down the
 #'      computation).
 #'
 #' @inherit cve return
 #'
@ -253,6 +279,7 @@ cve.call <- function(X, Y, method = "simple",
            stop("Dimension missmatch of 'V.init' and 'X'")
        }
        min.dim <- max.dim <- ncol(X) - ncol(V.init)
        storage.mode(V.init) <- "double"
        attempts <- 0L
    } else if (missing(k) || is.null(k)) {
        min.dim <- as.integer(min.dim)
@ -320,6 +347,9 @@ cve.call <- function(X, Y, method = "simple",
        }
    }
    # Convert numerical values to "double".
    storage.mode(X) <- storage.mode(Y) <- "double"
    if (is.function(logger)) {
        loggerEnv <- environment(logger)
    } else {
--- a/CVE_C/R/coef.R
+++ b/CVE_C/R/coef.R
@ -1,8 +1,10 @@
 #' Gets estimated SDR basis.
 #'
-#' Returns the SDR basis matrix for SDR dimension(s).
+#' Returns the SDR basis matrix for dimension \code{k}, i.e. returns the
 #' cve-estimate with dimension \eqn{p\times k}{p x k}.
 #'
 #' @param object instance of \code{cve} as output from \code{\link{cve}} or
-#'      \code{\link{cve.call}}
+#'      \code{\link{cve.call}}.
 #' @param k the SDR dimension.
 #' @param ... ignored.
 #'
--- a/CVE/R/datasets.R
+++ b/CVE/R/datasets.R
@ -0,0 +1,279 @@
 #' Multivariate Normal Distribution.
 #'
 #' Random generation for the multivariate normal distribution.
 #' \deqn{X \sim N_p(\mu, \Sigma)}{X ~ N_p(\mu, \Sigma)}
 #'
 #' @param n number of samples.
 #' @param mu mean
 #' @param sigma covariance matrix.
 #'
 #' @return a \eqn{n\times p}{n x p} matrix with samples in its rows.
 #'
 #' @examples
 #' \dontrun{
 #' rmvnorm(20, sigma = matrix(c(2, 1, 1, 2), 2))
 #' rmvnorm(20, mu = c(3, -1, 2))
 #' }
 #' @keywords internal
 rmvnorm <- function(n = 1, mu = rep(0, p), sigma = diag(p)) {
    if (!missing(sigma)) {
        p <- nrow(sigma)
    } else if (!missing(mu)) {
        mu <- matrix(mu, ncol = 1)
        p <- nrow(mu)
    } else {
        stop("At least one of 'mu' or 'sigma' must be supplied.")
    }
    # See: https://en.wikipedia.org/wiki/Multivariate_normal_distribution
    return(rep(mu, each = n) + matrix(rnorm(n * p), n) %*% chol(sigma))
 }
 #' Multivariate t distribution.
 #'
 #' Random generation from multivariate t distribution (student distribution).
 #'
 #' @param n number of samples.
 #' @param mu mean
 #' @param sigma a \eqn{k\times k}{k x k} positive definite matrix. If the degree
 #' \eqn{\nu} if bigger than 2 the created covariance is
 #' \deqn{var(x) = \Sigma\frac{\nu}{\nu - 2}}
 #' for \eqn{\nu > 2}.
 #' @param df degree of freedom \eqn{\nu}.
 #'
 #' @return a \eqn{n\times p}{n x p} matrix with samples in its rows.
 #'
 #' @examples
 #' \dontrun{
 #' rmvt(20, c(0, 1), matrix(c(3, 1, 1, 2), 2), 3)
 #' rmvt(20, sigma = matrix(c(2, 1, 1, 2), 2), 3)
 #' rmvt(20, mu = c(3, -1, 2), 3)
 #' }
 #' @keywords internal
 rmvt <- function(n = 1, mu = rep(0, p), sigma = diag(p), df = Inf) {
    if (!missing(sigma)) {
        p <- nrow(sigma)
    } else if (!missing(mu)) {
        mu <- matrix(mu, ncol = 1)
        p <- nrow(mu)
    } else {
        stop("At least one of 'mu' or 'sigma' must be supplied.")
    }
    if (df == Inf) {
        Z <- 1
    } else {
        Z <- sqrt(df / rchisq(n, df))
    }
    return(rmvnorm(n, sigma = sigma) * Z + rep(mu, each = n))
 }
 #' Generalized Normal Distribution.
 #'
 #' Random generation for generalized Normal Distribution.
 #'
 #' @param n Number of generated samples.
 #' @param mu mean.
 #' @param alpha first shape parameter.
 #' @param beta second shape parameter.
 #'
 #' @return numeric array of length \eqn{n}.
 #'
 #' @seealso https://en.wikipedia.org/wiki/Generalized_normal_distribution
 #' @keywords internal
 rgnorm <- function(n = 1, mu = 0, alpha = 1, beta = 1) {
    if (alpha <= 0 | beta <= 0) {
        stop("alpha and beta must be positive.")
    }
    lambda <- (1 / alpha)^beta
    scales <- qgamma(runif(n), shape = 1 / beta, scale = 1 / lambda)^(1 / beta)
    return(scales * ((-1)^rbinom(n, 1, 0.5)) + mu)
 }
 #' Laplace distribution
 #'
 #' Random generation for Laplace distribution.
 #'
 #' @param n Number of generated samples.
 #' @param mu mean.
 #' @param sd standard deviation.
 #'
 #' @return numeric array of length \eqn{n}.
 #'
 #' @seealso https://en.wikipedia.org/wiki/Laplace_distribution
 #' @keywords internal
 rlaplace <- function(n = 1, mu = 0, sd = 1) {
    U <- runif(n, -0.5, 0.5)
    scale <- sd / sqrt(2)
    return(mu - scale * sign(U) * log(1 - 2 * abs(U)))
 }
 #' Generates test datasets.
 #'
 #' Provides sample datasets M1-M7 used in the paper Conditional variance
 #' estimation for sufficient dimension reduction, Lukas Fertl, Efstathia Bura.
 #' The general model is given by:
 #' \deqn{Y = g(B'X) + \epsilon}
 #'
 #' @param name One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4",}
 #' \code{"M5"}, \code{"M6"} or \code{"M7"}. Alternative just the dataset number
 #' 1-7.
 #' @param n number of samples.
 #' @param p Dimension of random variable \eqn{X}.
 #' @param sd standard diviation for error term \eqn{\epsilon}.
 #' @param ... Additional parameters only for "M2" (namely \code{pmix} and
 #' \code{lambda}), see: below.
 #'
 #' @return List with elements
 #' \itemize{
 #'      \item{X}{data, a \eqn{n\times p}{n x p} matrix.}
 #'      \item{Y}{response.}
 #'      \item{B}{the dim-reduction matrix}
 #'      \item{name}{Name of the dataset (name parameter)}
 #' }
 #'
 #' @section M1:
 #' The predictors are distributed as
 #' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, \Sigma)} with
 #' \eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for
 #' \eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 1} with a default
 #' of \eqn{n = 100} data points. \eqn{p = 20},
 #' \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, and \eqn{Y} is
 #' given as \deqn{Y = cos(b_1'X) + \epsilon} where \eqn{\epsilon} is
 #' distributed as generalized normal distribution with location 0,
 #' shape-parameter 0.5, and the scale-parameter is chosen such that
 #' \eqn{Var(\epsilon) = 0.5}.
 #' @section M2:
 #' The predictors are distributed as \eqn{X \sim Z 1_p \lambda + N_p(0, I_p)}{X ~ Z 1_p \lambda + N_p(0, I_p)}. with
 #' \eqn{Z \sim 2 Binom(p_{mix}) - 1\in\{-1, 1\}}{Z~2Binom(pmix)-1} where
 #' \eqn{1_p} is the \eqn{p}-dimensional vector of one's, for a subspace
 #' dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points.
 #' \eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
 #' and \eqn{Y} is \deqn{Y = cos(b_1'X) + 0.5\epsilon} where \eqn{\epsilon} is
 #' standard normal.
 #' Defaults for \code{pmix} is 0.3 and \code{lambda} defaults to 1.
 #' @section M3:
 #' The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)}
 #' for a subspace
 #' dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points.
 #' \eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
 #' and \eqn{Y} is 
 #' \deqn{Y = 2 log(|b_1'X| + 2) + 0.5\epsilon} where \eqn{\epsilon} is
 #' standard normal.
 #' @section M4:
 #' The predictors are distributed as \eqn{X\sim N_p(0,\Sigma)}{X~N_p(0,\Sigma)}
 #' with \eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for
 #' \eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 2} with a default
 #' of \eqn{n = 100} data points. \eqn{p = 20},
 #' \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
 #' \eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)}
 #' and \eqn{Y} is given as \deqn{Y = \frac{b_1'X}{0.5 + (1.5 + b_2'X)^2} + 0.5\epsilon}{Y = (b_1'X) / (0.5 + (1.5 + b_2'X)^2) + 0.5\epsilon}
 #' where \eqn{\epsilon} is standard normal.
 #' @section M5:
 #' The predictors are distributed as \eqn{X\sim U([0,1]^p)}{X~U([0, 1]^p)}
 #' where \eqn{U([0, 1]^p)} is the uniform distribution with
 #' independent components on the \eqn{p}-dimensional hypercube for a subspace
 #' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
 #' \eqn{p = 20},
 #' \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
 #' \eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)}
 #' and \eqn{Y} is given as \deqn{Y = cos(\pi b_1'X)(b_2'X + 1)^2 + 0.5\epsilon}
 #' where \eqn{\epsilon} is standard normal.
 #' @section M6:
 #' The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)}
 #' for a subspace dimension of \eqn{k = 3} with a default of \eqn{n = 200} data
 #' point. \eqn{p = 20, b_1 = e_1, b_2 = e_2}, and \eqn{b_3 = e_p}, where
 #' \eqn{e_j} is the \eqn{j}-th unit vector in the \eqn{p}-dimensional space.
 #' \eqn{Y} is given as \deqn{Y = (b_1'X)^2+(b_2'X)^2+(b_3'X)^2+0.5\epsilon}
 #' where \eqn{\epsilon} is standard normal.
 #' @section M7:
 #' The predictors are distributed as \eqn{X\sim t_3(I_p)}{X~t_3(I_p)} where
 #' \eqn{t_3(I_p)} is the standard multivariate t-distribution with 3 degrees of
 #' freedom, for a subspace dimension of \eqn{k = 4} with a default of
 #' \eqn{n = 200} data points.
 #' \eqn{p = 20, b_1 = e_1, b_2 = e_2, b_3 = e_3}, and \eqn{b_4 = e_p}, where
 #' \eqn{e_j} is the \eqn{j}-th unit vector in the \eqn{p}-dimensional space.
 #' \eqn{Y} is given as \deqn{Y = (b_1'X)(b_2'X)^2+(b_3'X)(b_4'X)+0.5\epsilon}
 #' where \eqn{\epsilon} is distributed as generalized normal distribution with
 #' location 0, shape-parameter 1, and the scale-parameter is chosen such that
 #' \eqn{Var(\epsilon) = 0.25}.
 #'
 #' @references Fertl Lukas, Bura Efstathia. (2019), Conditional Variance
 #' Estimation for Sufficient Dimension Reduction. Working Paper.
 #'
 #' @import stats
 #' @importFrom stats rnorm rbinom
 #' @export
 dataset <- function(name = "M1", n = NULL, p = 20, sd = 0.5, ...) {
    name <- toupper(name)
    if (nchar(name) == 1) { name <- paste0("M", name) }
    if (name == "M1") {
        if (missing(n)) { n <- 100 }
        # B ... `p x 1`
        B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
        X <- rmvnorm(n, sigma = 0.5^abs(outer(1:p, 1:p, FUN = `-`)))
        beta <- 0.5
        Y <- cos(X %*% B) + rgnorm(n, 0,
            alpha = sqrt(sd^2 * gamma(1 / beta) / gamma(3 / beta)),
            beta = beta
        )
    } else if (name == "M2") {
        if (missing(n)) { n <- 100 }
        params <- list(...)
        pmix <- if (is.null(params$pmix)) { 0.3 } else { params$pmix }
        lambda <- if (is.null(params$lambda)) { 1 } else { params$lambda }
        # B ... `p x 1`
        B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
        Z <- 2 * rbinom(n, 1, pmix) - 1
        X <- matrix(rep(lambda * Z, p) + rnorm(n * p), n)
        Y <- cos(X %*% B) + rnorm(n, 0, sd)
    } else if (name == "M3") {
        if (missing(n)) { n <- 100 }
        # B ... `p x 1`
        B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
        X <- matrix(rnorm(n * p), n)
        Y <- 2 * log(2 + abs(X %*% B)) + rnorm(n, 0, sd)
    } else if (name == "M4") {
        if (missing(n)) { n <- 200 }
        # B ... `p x 2`
        B <- cbind(
            c(rep(1 / sqrt(6), 6), rep(0, p - 6)),
            c(rep(c(1, -1), 3) / sqrt(6), rep(0, p - 6))
        )
        X <- rmvnorm(n, sigma = 0.5^abs(outer(1:p, 1:p, FUN = `-`)))
        XB <- X %*% B
        Y <- (XB[, 1]) / (0.5 + (XB[, 2] + 1.5)^2) + rnorm(n, 0, sd)
    } else if (name == "M5") {
        if (missing(n)) { n <- 200 }
        # B ... `p x 2`
        B <- cbind(
            c(rep(1,        6), rep(0, p - 6)),
            c(rep(c(1, -1), 3), rep(0, p - 6))
        ) / sqrt(6)
        X <- matrix(runif(n * p), n)
        XB <- X %*% B
        Y <- cos(XB[, 1] * pi) * (XB[, 2] + 1)^2 + rnorm(n, 0, sd)
    } else if (name == "M6") {
        if (missing(n)) { n <- 200 }
        # B ... `p x 3`
        B <- diag(p)[, -(3:(p - 1))]
        X <- matrix(rnorm(n * p), n)
        Y <- rowSums((X %*% B)^2) + rnorm(n, 0, sd)
    } else if (name == "M7") {
        if (missing(n)) { n <- 400 }
        # B ... `p x 4`
        B <- diag(p)[, -(4:(p - 1))]
        # "R"andom "M"ulti"V"ariate "S"tudent
        X <- rmvt(n = n, sigma = diag(p), df = 3)
        XB <- X %*% B
        Y <- (XB[, 1]) * (XB[, 2])^2 + (XB[, 3]) * (XB[, 4])
        Y <- Y + rlaplace(n, 0, sd)
    } else {
        stop("Got unknown dataset name.")
    }
    return(list(X = X, Y = Y, B = B, name = name))
 }
--- a/CVE_C/R/directions.R
+++ b/CVE_C/R/directions.R
@ -5,9 +5,15 @@ directions <- function(dr, k) {
 #' Computes projected training data \code{X} for given dimension `k`.
 #'
-#' @param dr Instance of 'cve' as returned by \code{cve}.
+#' Projects the dimensional design matrix \eqn{X} on the columnspace of the
 #' cve-estimate for given dimension \eqn{k}.
 #'
 #' @param dr Instance of \code{'cve'} as returned by \code{\link{cve}}.
 #' @param k SDR dimension to use for projection.
 #'
 #' @return the \eqn{n\times k}{n x k} dimensional matrix \eqn{X B} where \eqn{B}
 #'  is the  cve-estimate for dimension \eqn{k}.
 #'
 #' @examples
 #' # create B for simulation (k = 1)
 #' B <- rep(1, 5) / sqrt(5)
--- a/CVE_C/R/estimateBandwidth.R
+++ b/CVE_C/R/estimateBandwidth.R
@ -1,27 +1,22 @@
 #' Bandwidth estimation for CVE.
 #'
-#' Estimates a bandwidth \code{h} according
+#' If no bandwidth or function for calculating it is supplied, the CVE method
 #' defaults to using the following formula (version 1)
 #' \deqn{%
-#' h = (2 * tr(\Sigma) / p) * (1.2 * n^{-1 / (4 + k)})^2}{%
+#'    h = \frac{2 tr(\Sigma)}{p} (1.2 n^{\frac{-1}{4 + k}})^2}{%
-#' h = (2 * tr(\Sigma) / p) * (1.2 * n^(\frac{-1}{4 + k}))^2}
+#'    h = (2 * tr(\Sigma) / p) * (1.2 * n^(-1 / (4 + k)))^2}
-#' with \eqn{n} the sample size, \eqn{p} its dimension
+#' Alternative version 2 is used for dimension prediction which is given by
-#' (\code{n <- nrow(X); p <- ncol(X)}) and the covariance-matrix \eqn{\Sigma}
+#'    \deqn{%
-#' which is \code{(n-1)/n} times the sample covariance estimate.
+#'    h = (2 * tr(\Sigma) / p) * \chi_k^{-1}(\frac{nObs - 1}{n - 1})}{%
 #'    h = (2 * tr(\Sigma) / p) * \chi_k^-1((nObs - 1) / (n - 1))}
 #' with \eqn{n} the sample size, \eqn{p} its dimension and the
 #' covariance-matrix \eqn{\Sigma}, which is \code{(n-1)/n} times the sample
 #' covariance estimate.
 #'
-#' @param X data matrix with samples in its rows.
+#' @param X a \eqn{n\times p}{n x p} matrix with samples in its rows.
 #' @param k Dimension of lower dimensional projection.
-#' @param nObs number of points in a slice, see \eqn{nObs} in CVE paper.
+#' @param nObs number of points in a slice, only for version 2.
-#' @param version either \code{1} or \code{2}, where
+#' @param version either \code{1} or \code{2}.
 #' \itemize{
 #'      \item 1: uses the following formula:
 #'          \deqn{%
 #'          h = (2 * tr(\Sigma) / p) * (1.2 * n^{-1 / (4 + k)})^2}{%
 #'          h = (2 * tr(\Sigma) / p) * (1.2 * n^(\frac{-1}{4 + k}))^2}
 #'      \item 2: uses
 #'          \deqn{%
 #'          h = (2 * tr(\Sigma) / p) * \chi_k^-1((nObs - 1) / (n - 1))}{%
 #'          h = (2 * tr(\Sigma) / p) * \chi_k^{-1}(\frac{nObs - 1}{n - 1})}
 #'  }
 #'
 #' @return Estimated bandwidth \code{h}.
 #'
--- a/CVE_C/R/plot.R
+++ b/CVE_C/R/plot.R
@ -1,6 +1,9 @@
 #' Loss distribution elbow plot.
 #'
-#' Boxplots of the loss from \code{min.dim} to \code{max.dim} \code{k} values.
+#' Boxplots of the output \code{L} from \code{\link{cve}} over \code{k} from
 #' \code{min.dim} to \code{max.dim}. For given \code{k}, \code{L} corresponds
 #' to \eqn{L_n(V, X_i)} where \eqn{V \in S(p, p - k)}{V} is the minimizer of
 #' \eqn{L_n(V)}, for further details see the paper.
 #'
 #' @param x Object of class \code{"cve"} (result of [\code{\link{cve}}]).
 #' @param ... Pass through parameters to [\code{\link{plot}}] and
@ -31,6 +34,9 @@
 #' # elbow plot
 #' plot(cve.obj.simple)
 #'
 #' @references Fertl Lukas, Bura Efstathia. (2019), Conditional Variance
 #' Estimation for Sufficient Dimension Reduction. Working Paper.
 #'
 #' @seealso see \code{\link{par}} for graphical parameters to pass through
 #'      as well as \code{\link{plot}}, the standard plot utility.
 #' @method plot cve
--- a/CVE_C/R/predict.R
+++ b/CVE_C/R/predict.R
@ -1,6 +1,7 @@
 #' Predict method for CVE Fits.
 #'
-#' Predict responces using reduced data with \code{\link{mars}}.
+#' Predict response using projected data where the forward model \eqn{g(B'X)}
 #' is estimated using \code{\link{mars}}.
 #'
 #' @param object instance of class \code{cve} (result of \code{cve},
 #'  \code{cve.call}).
@ -36,7 +37,7 @@
 #'
 #' # plot prediction against y.test
 #' plot(yhat, y.test)
-#' @seealso \code{\link{cve}}, \code{\link{cve.call}} or \pkg{\link{mars}}.
+#' @seealso \code{\link{cve}}, \code{\link{cve.call}} and \pkg{\link{mars}}.
 #'
 #' @rdname predict.cve
 #'
--- a/CVE_C/R/predict_dim.R
+++ b/CVE_C/R/predict_dim.R
@ -36,10 +36,6 @@ predict_dim_elbow <- function(object) {
    # Get dimensions
    n <- nrow(X)
    p <- ncol(X)
    # Compute persistent data.
    i = rep(1:n, n)
    j = rep(1:n, each = n)
    D.eucl = matrix((X[i, ] - X[j, ])^2 %*% rep(1, p), n)
    losses <- vector("double", length(object$res))
    names(losses) <- names(object$res)
@ -48,16 +44,16 @@ predict_dim_elbow <- function(object) {
        # extract dimension specific estimates and dimensions.
        k <- dr.k$k
        V <- dr.k$V
-        q <- ncol(V)
+        # estimate bandwidth according alternative formula.
        # estimate bandwidth according alternative formula (see: TODO: see)
        h <- estimate.bandwidth(X, k, sqrt(n), version = 2L)
        # Projected `X`
-        XV <- X %*% V
+        XQ <- X %*% (diag(1, p) - tcrossprod(V)) # X (I - V V')
-        # Devectorized distance matrix
+        # Compute distances
-        # (inefficient in R but fast in C)
+        d2 <- tcrossprod(XQ) # XQ XQ'
-        D <- matrix((XV[i, , drop = F] - XV[j, , drop = F])^2 %*% rep(1, q), n)
+        d1 <- matrix(diag(d2), n, n)
-        D <- D.eucl - D
+        D <- d1 - 2 * d2 + t(d1)
        # Apply kernel
        # Note: CVE uses for d = ||Q(X_i - X_j)|| the kernel exp(-d^4 / (2 h^2))
        K <- exp((-0.5 / h^2) * D^2)
        # sum columns
        colSumsK <- colSums(K)
@ -81,11 +77,7 @@ predict_dim_wilcoxon <- function(object, p.value = 0.05) {
    # Get dimensions
    n <- nrow(X)
    p <- ncol(X)
-    # Compute persistent data.
+    
    i = rep(1:n, n)
    j = rep(1:n, each = n)
    D.eucl = matrix((X[i, ] - X[j, ])^2 %*% rep(1, p), n)
    L <- matrix(NA, n, length(object$res))
    colnames(L) <- names(object$res)
    # Compute per sample losses with alternative bandwidth for each dimension.
@ -93,16 +85,16 @@ predict_dim_wilcoxon <- function(object, p.value = 0.05) {
        # extract dimension specific estimates and dimensions.
        k <- dr.k$k
        V <- dr.k$V
-        q <- ncol(V)
+        # estimate bandwidth according alternative formula.
        # estimate bandwidth according alternative formula (see: TODO: see)
        h <- estimate.bandwidth(X, k, sqrt(n), version = 2L)
        # Projected `X`
-        XV <- X %*% V
+        XQ <- X %*% (diag(1, p) - tcrossprod(V)) # X (I - V V')
-        # Devectorized distance matrix
+        # Compute distances
-        # (inefficient in R but fast in C)
+        d2 <- tcrossprod(XQ) # XQ XQ'
-        D <- matrix((XV[i, , drop = F] - XV[j, , drop = F])^2 %*% rep(1, q), n)
+        d1 <- matrix(diag(d2), n, n)
-        D <- D.eucl - D
+        D <- d1 - 2 * d2 + t(d1)
        # Apply kernel
        # Note: CVE uses for d = ||Q(X_i - X_j)|| the kernel exp(-d^4 / (2 h^2))
        K <- exp((-0.5 / h^2) * D^2)
        # sum columns
        colSumsK <- colSums(K)
@ -130,27 +122,24 @@ predict_dim_wilcoxon <- function(object, p.value = 0.05) {
    ))
 }
-#' Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.
+#' \code{"TODO: @Lukas"}
 #'  TODO: rewrite!!!
 #' 
-#' @param object instance of class \code{cve} (result of \code{cve},
+#' @param object instance of class \code{cve} (result of \code{\link{cve}},
-#'  \code{cve.call}).
+#'  \code{\link{cve.call}}).
 #' @param method one of \code{"CV"}, \code{"elbow"} or \code{"wilcoxon"}.
 #' @param ... ignored.
 #'
-#' @return list with
+#' @return list with \code{"k"} the predicted dimension and method dependent 
-#' \itemize{
+#'      informatoin.
 #'    \item MSE: Mean Square Error,
 #'    \item k: predicted dimensions.
 #' }
 #'
-#' @section cv:
+#' @section Method cv:
-#'  Cross-validation ... TODO:
+#' TODO: \code{"TODO: @Lukas"}.
 #'
-#' @section elbow:
+#' @section Method elbow:
-#'  Cross-validation ... TODO:
+#' TODO: \code{"TODO: @Lukas"}.
 #'
-#' @section wilcoxon:
+#' @section Method wilcoxon:
-#'  Cross-validation ... TODO:
+#' TODO: \code{"TODO: @Lukas"}.
 #'
 #' @examples
 #' # create B for simulation
--- a/CVE_C/R/summary.R
+++ b/CVE_C/R/summary.R
@ -1,5 +1,9 @@
 #' Prints a summary of a \code{cve} result.
-#' @param object Instance of 'cve' as returned by \code{cve}.
+#'
 #' Prints a summary statistics of output \code{L} from \code{cve} for
 #' \code{k = min.dim, ..., max.dim}.
 #'
 #' @param object Instance of \code{"cve"} as returned by \code{\link{cve}}.
 #' @param ... ignored.
 #'
 #' @examples
--- a/CVE/R/util.R
+++ b/CVE/R/util.R
@ -0,0 +1,24 @@
 #' Draws a sample from the invariant measure on the Stiefel manifold
 #' \eqn{S(p, q)}.
 #'
 #' @param p row dimension
 #' @param q col dimension
 #' @return \eqn{p \times q}{p x q} semi-orthogonal matrix.
 #' @examples
 #'  V <- rStiefel(6, 4)
 #' @export
 rStiefel <- function(p, q) {
    return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
 }
 #' Null space basis of given matrix `V`
 #'
 #' @param V `(p, q)` matrix
 #' @return Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`.
 #' @keywords internal
 #' @export
 null <- function(V) {
    tmp <- qr(V)
    set <- if(tmp$rank == 0L) seq_len(ncol(V)) else -seq_len(tmp$rank)
    return(qr.Q(tmp, complete = TRUE)[, set, drop = FALSE])
 }
--- a/CVE_C/inst/doc/CVE_paper.pdf
+++ b/CVE_C/inst/doc/CVE_paper.pdf
--- a/CVE_C/man/CVE-package.Rd
+++ b/CVE_C/man/CVE-package.Rd
@ -26,7 +26,7 @@ variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
 zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
 is an unknown, continuous non-constant function,
 and \eqn{B = (b_1, ..., b_k)} is
-a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}. 
+a real \eqn{p \times k}{p x k} of rank \eqn{k \leq p}{k <= p}. 
 Without loss of generality \eqn{B} is assumed to be orthonormal.
 }
 \references{
--- a/CVE_C/man/coef.cve.Rd
+++ b/CVE_C/man/coef.cve.Rd
@ -8,7 +8,7 @@
 }
 \arguments{
 \item{object}{instance of \code{cve} as output from \code{\link{cve}} or
-\code{\link{cve.call}}}
+\code{\link{cve.call}}.}
 \item{k}{the SDR dimension.}
@ -18,7 +18,8 @@
 dir the matrix of CS or CMS of given dimension
 }
 \description{
-Returns the SDR basis matrix for SDR dimension(s).
+Returns the SDR basis matrix for dimension \code{k}, i.e. returns the
 cve-estimate with dimension \eqn{p\times k}{p x k}.
 }
 \examples{
 # set dimensions for simulation model
--- a/CVE_C/man/cve.Rd
+++ b/CVE_C/man/cve.Rd
@ -8,31 +8,51 @@ cve(formula, data, method = "simple", max.dim = 10L, ...)
 }
 \arguments{
 \item{formula}{an object of class \code{"formula"} which is a symbolic
-description of the model to be fitted.}
+description of the model to be fitted like \eqn{Y\sim X}{Y ~ X} where
 \eqn{Y} is a \eqn{n}-dimensional vector of the response variable and
 \eqn{X} is a \eqn{n\times p}{n x p} matrix of the predictors.}
 \item{data}{an optional data frame, containing the data for the formula if
-supplied.}
+supplied like \code{data <- data.frame(Y, X)} with dimension
 \eqn{n \times (p + 1)}{n x (p + 1)}. By default the variables are taken from
 the environment from which \code{cve} is called.}
-\item{method}{specifies the CVE method variation as one of
+\item{method}{This character string specifies the method of fitting. The
 options are
 \itemize{
-     \item "simple" exact implementation as described in the paper listed 
+     \item "simple" implementation as described in the paper.
-         below.
+     \item "weighted" variation with adaptive weighting of slices.
-     \item "weighted" variation with addaptive weighting of slices.
+}
-}}
+see paper.}
 \item{max.dim}{upper bounds for \code{k}, (ignored if \code{k} is supplied).}
-\item{...}{Parameters passed on to \code{cve.call}.}
+\item{...}{optional parameters passed on to \code{cve.call}.}
 }
 \value{
 an S3 object of class \code{cve} with components:
 \describe{
-   \item{X}{Original training data,}
+   \item{X}{design matrix of predictor vector used for calculating
-   \item{Y}{Responce of original training data,}
+     cve-estimate,}
   \item{Y}{\eqn{n}-dimensional vector of responses used for calculating
       cve-estimate,}
   \item{method}{Name of used method,}
   \item{call}{the matched call,}
-   \item{res}{list of components \code{V, L, B, loss, h} and \code{k} for
+   \item{res}{list of components \code{V, L, B, loss, h} for
-              each \eqn{k=min.dim,...,max.dim} (dimension).}
+      each \code{k = min.dim, ..., max.dim}. If \code{k} was supplied in the
      call \code{min.dim = max.dim = k}.
      \itemize{
          \item \code{B} is the cve-estimate with dimension
              \eqn{p\times k}{p x k}.
          \item \code{V} is the orthogonal complement of \eqn{B}.
          \item \code{L} is the loss for each sample seperatels such that
              it's mean is \code{loss}.
          \item \code{loss} is the value of the target function that is 
              minimized, evaluated at \eqn{V}.
          \item \code{h} bandwidth parameter used to calculate
              \code{B, V, loss, L}.
      }
   }
 }
 }
 \description{
@ -56,7 +76,7 @@ variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
 zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
 is an unknown, continuous non-constant function,
 and \eqn{B = (b_1, ..., b_k)} is
-a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}. 
+a real \eqn{p \times k}{p x k} of rank \eqn{k \leq p}{k <= p}. 
 Without loss of generality \eqn{B} is assumed to be orthonormal.
 }
 \examples{
@ -67,7 +87,7 @@ k <- 2
 b1 <- rep(1 / sqrt(p), p)
 b2 <- (-1)^seq(1, p) / sqrt(p)
 B <- cbind(b1, b2)
-# samplsize
+# sample size
 n <- 200
 set.seed(21)
 # creat predictor data x ~ N(0, I_p)
--- a/CVE_C/man/cve.call.Rd
+++ b/CVE_C/man/cve.call.Rd
@ -10,9 +10,9 @@ cve.call(X, Y, method = "simple", nObs = sqrt(nrow(X)), h = NULL,
  max.iter = 50L, attempts = 10L, logger = NULL)
 }
 \arguments{
-\item{X}{data matrix with samples in its rows.}
+\item{X}{Design matrix with dimension \eqn{n\times p}{n x p}.}
-\item{Y}{Responses (1 dimensional).}
+\item{Y}{numeric array of length \eqn{n} of Responses.}
 \item{method}{specifies the CVE method variation as one of
 \itemize{
@ -34,38 +34,59 @@ estimated bandwidth.}
 \item{k}{Dimension of lower dimensional projection, if \code{k} is given
 only the specified dimension \code{B} matrix is estimated.}
-\item{momentum}{number of [0, 1) giving the ration of momentum for eucledian
+\item{momentum}{number of \eqn{[0, 1)} giving the ration of momentum for
-gradient update with a momentum term.}
+eucledian gradient update with a momentum term. \code{momentum = 0}
 corresponds to normal gradient descend.}
 \item{tau}{Initial step-size.}
 \item{tol}{Tolerance for break condition.}
 \item{slack}{Positive scaling to allow small increases of the loss while
-optimizing.}
+optimizing, i.e. \code{slack = 0.1} allows the target function to
 increase up to \eqn{10 \%} in one optimization step.}
-\item{gamma}{step-size reduction multiple.}
+\item{gamma}{step-size reduction multiple. If gradient step with step size
 \code{tau} is not accepted \code{gamma * tau} is set to the next step
 size.}
 \item{V.init}{Semi-orthogonal matrix of dimensions `(ncol(X), ncol(X) - k)
-as optimization starting value. (If supplied, \code{attempts} is
+used as starting value in the optimization. (If supplied,
-set to 1 and \code{k} to match dimension)}
+\code{attempts} is set to 0 and \code{k} to match dimension).}
 \item{max.iter}{maximum number of optimization steps.}
-\item{attempts}{number of arbitrary different starting points.}
+\item{attempts}{If \code{V.init} not supplied, the optimization is carried
 out \code{attempts} times with starting values drawn from the invariant
 measure on the Stiefel manifold (see \code{\link{rStiefel}}).}
-\item{logger}{a logger function (only for advanced user, significantly slows
+\item{logger}{a logger function (only for advanced user, slows down the
-down the computation).}
+computation).}
 }
 \value{
 an S3 object of class \code{cve} with components:
 \describe{
-   \item{X}{Original training data,}
+   \item{X}{design matrix of predictor vector used for calculating
-   \item{Y}{Responce of original training data,}
+     cve-estimate,}
   \item{Y}{\eqn{n}-dimensional vector of responses used for calculating
       cve-estimate,}
   \item{method}{Name of used method,}
   \item{call}{the matched call,}
-   \item{res}{list of components \code{V, L, B, loss, h} and \code{k} for
+   \item{res}{list of components \code{V, L, B, loss, h} for
-              each \eqn{k=min.dim,...,max.dim} (dimension).}
+      each \code{k = min.dim, ..., max.dim}. If \code{k} was supplied in the
      call \code{min.dim = max.dim = k}.
      \itemize{
          \item \code{B} is the cve-estimate with dimension
              \eqn{p\times k}{p x k}.
          \item \code{V} is the orthogonal complement of \eqn{B}.
          \item \code{L} is the loss for each sample seperatels such that
              it's mean is \code{loss}.
          \item \code{loss} is the value of the target function that is 
              minimized, evaluated at \eqn{V}.
          \item \code{h} bandwidth parameter used to calculate
              \code{B, V, loss, L}.
      }
   }
 }
 }
 \description{
@ -89,7 +110,7 @@ variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
 zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
 is an unknown, continuous non-constant function,
 and \eqn{B = (b_1, ..., b_k)} is
-a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}. 
+a real \eqn{p \times k}{p x k} of rank \eqn{k \leq p}{k <= p}. 
 Without loss of generality \eqn{B} is assumed to be orthonormal.
 }
 \examples{
--- a/CVE/man/dataset.Rd
+++ b/CVE/man/dataset.Rd
@ -0,0 +1,127 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/datasets.R
 \name{dataset}
 \alias{dataset}
 \title{Generates test datasets.}
 \usage{
 dataset(name = "M1", n = NULL, p = 20, sd = 0.5, ...)
 }
 \arguments{
 \item{name}{One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4",}
 \code{"M5"}, \code{"M6"} or \code{"M7"}. Alternative just the dataset number
 1-7.}
 \item{n}{number of samples.}
 \item{p}{Dimension of random variable \eqn{X}.}
 \item{sd}{standard diviation for error term \eqn{\epsilon}.}
 \item{...}{Additional parameters only for "M2" (namely \code{pmix} and
 \code{lambda}), see: below.}
 }
 \value{
 List with elements
 \itemize{
     \item{X}{data, a \eqn{n\times p}{n x p} matrix.}
     \item{Y}{response.}
     \item{B}{the dim-reduction matrix}
     \item{name}{Name of the dataset (name parameter)}
 }
 }
 \description{
 Provides sample datasets M1-M7 used in the paper Conditional variance
 estimation for sufficient dimension reduction, Lukas Fertl, Efstathia Bura.
 The general model is given by:
 \deqn{Y = g(B'X) + \epsilon}
 }
 \section{M1}{
 The predictors are distributed as
 \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, \Sigma)} with
 \eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for
 \eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 1} with a default
 of \eqn{n = 100} data points. \eqn{p = 20},
 \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, and \eqn{Y} is
 given as \deqn{Y = cos(b_1'X) + \epsilon} where \eqn{\epsilon} is
 distributed as generalized normal distribution with location 0,
 shape-parameter 0.5, and the scale-parameter is chosen such that
 \eqn{Var(\epsilon) = 0.5}.
 }
 \section{M2}{
 The predictors are distributed as \eqn{X \sim Z 1_p \lambda + N_p(0, I_p)}{X ~ Z 1_p \lambda + N_p(0, I_p)}. with
 \eqn{Z \sim 2 Binom(p_{mix}) - 1\in\{-1, 1\}}{Z~2Binom(pmix)-1} where
 \eqn{1_p} is the \eqn{p}-dimensional vector of one's, for a subspace
 dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points.
 \eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
 and \eqn{Y} is \deqn{Y = cos(b_1'X) + 0.5\epsilon} where \eqn{\epsilon} is
 standard normal.
 Defaults for \code{pmix} is 0.3 and \code{lambda} defaults to 1.
 }
 \section{M3}{
 The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)}
 for a subspace
 dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points.
 \eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
 and \eqn{Y} is 
 \deqn{Y = 2 log(|b_1'X| + 2) + 0.5\epsilon} where \eqn{\epsilon} is
 standard normal.
 }
 \section{M4}{
 The predictors are distributed as \eqn{X\sim N_p(0,\Sigma)}{X~N_p(0,\Sigma)}
 with \eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for
 \eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 2} with a default
 of \eqn{n = 100} data points. \eqn{p = 20},
 \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
 \eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)}
 and \eqn{Y} is given as \deqn{Y = \frac{b_1'X}{0.5 + (1.5 + b_2'X)^2} + 0.5\epsilon}{Y = (b_1'X) / (0.5 + (1.5 + b_2'X)^2) + 0.5\epsilon}
 where \eqn{\epsilon} is standard normal.
 }
 \section{M5}{
 The predictors are distributed as \eqn{X\sim U([0,1]^p)}{X~U([0, 1]^p)}
 where \eqn{U([0, 1]^p)} is the uniform distribution with
 independent components on the \eqn{p}-dimensional hypercube for a subspace
 dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
 \eqn{p = 20},
 \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
 \eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)}
 and \eqn{Y} is given as \deqn{Y = cos(\pi b_1'X)(b_2'X + 1)^2 + 0.5\epsilon}
 where \eqn{\epsilon} is standard normal.
 }
 \section{M6}{
 The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)}
 for a subspace dimension of \eqn{k = 3} with a default of \eqn{n = 200} data
 point. \eqn{p = 20, b_1 = e_1, b_2 = e_2}, and \eqn{b_3 = e_p}, where
 \eqn{e_j} is the \eqn{j}-th unit vector in the \eqn{p}-dimensional space.
 \eqn{Y} is given as \deqn{Y = (b_1'X)^2+(b_2'X)^2+(b_3'X)^2+0.5\epsilon}
 where \eqn{\epsilon} is standard normal.
 }
 \section{M7}{
 The predictors are distributed as \eqn{X\sim t_3(I_p)}{X~t_3(I_p)} where
 \eqn{t_3(I_p)} is the standard multivariate t-distribution with 3 degrees of
 freedom, for a subspace dimension of \eqn{k = 4} with a default of
 \eqn{n = 200} data points.
 \eqn{p = 20, b_1 = e_1, b_2 = e_2, b_3 = e_3}, and \eqn{b_4 = e_p}, where
 \eqn{e_j} is the \eqn{j}-th unit vector in the \eqn{p}-dimensional space.
 \eqn{Y} is given as \deqn{Y = (b_1'X)(b_2'X)^2+(b_3'X)(b_4'X)+0.5\epsilon}
 where \eqn{\epsilon} is distributed as generalized normal distribution with
 location 0, shape-parameter 1, and the scale-parameter is chosen such that
 \eqn{Var(\epsilon) = 0.25}.
 }
 \references{
 Fertl Lukas, Bura Efstathia. (2019), Conditional Variance
 Estimation for Sufficient Dimension Reduction. Working Paper.
 }
--- a/CVE_C/man/directions.cve.Rd
+++ b/CVE_C/man/directions.cve.Rd
@ -8,12 +8,17 @@
 \method{directions}{cve}(dr, k)
 }
 \arguments{
-\item{dr}{Instance of 'cve' as returned by \code{cve}.}
+\item{dr}{Instance of \code{'cve'} as returned by \code{\link{cve}}.}
 \item{k}{SDR dimension to use for projection.}
 }
 \value{
 the \eqn{n\times k}{n x k} dimensional matrix \eqn{X B} where \eqn{B}
 is the  cve-estimate for dimension \eqn{k}.
 }
 \description{
-Computes projected training data \code{X} for given dimension `k`.
+Projects the dimensional design matrix \eqn{X} on the columnspace of the
 cve-estimate for given dimension \eqn{k}.
 }
 \examples{
 # create B for simulation (k = 1)
--- a/CVE_C/man/estimate.bandwidth.Rd
+++ b/CVE_C/man/estimate.bandwidth.Rd
@ -4,26 +4,33 @@
 \alias{estimate.bandwidth}
 \title{Bandwidth estimation for CVE.}
 \usage{
-estimate.bandwidth(X, k, nObs)
+estimate.bandwidth(X, k, nObs, version = 1L)
 }
 \arguments{
-\item{X}{data matrix with samples in its rows.}
+\item{X}{a \eqn{n\times p}{n x p} matrix with samples in its rows.}
 \item{k}{Dimension of lower dimensional projection.}
-\item{nObs}{number of points in a slice, see \eqn{nObs} in CVE paper.}
+\item{nObs}{number of points in a slice, only for version 2.}
 \item{version}{either \code{1} or \code{2}.}
 }
 \value{
 Estimated bandwidth \code{h}.
 }
 \description{
-Estimates a bandwidth \code{h} according
+If no bandwidth or function for calculating it is supplied, the CVE method
 defaults to using the following formula (version 1)
 \deqn{%
-h = (2 * tr(\Sigma) / p) * (1.2 * n^{-1 / (4 + k)})^2}{%
+   h = \frac{2 tr(\Sigma)}{p} (1.2 n^{\frac{-1}{4 + k}})^2}{%
-h = (2 * tr(\Sigma) / p) * (1.2 * n^(\frac{-1}{4 + k}))^2}
+   h = (2 * tr(\Sigma) / p) * (1.2 * n^(-1 / (4 + k)))^2}
-with \eqn{n} the sample size, \eqn{p} its dimension
+Alternative version 2 is used for dimension prediction which is given by
-(\code{n <- nrow(X); p <- ncol(X)}) and the covariance-matrix \eqn{\Sigma}
+   \deqn{%
-which is \code{(n-1)/n} times the sample covariance estimate.
+   h = (2 * tr(\Sigma) / p) * \chi_k^{-1}(\frac{nObs - 1}{n - 1})}{%
   h = (2 * tr(\Sigma) / p) * \chi_k^-1((nObs - 1) / (n - 1))}
 with \eqn{n} the sample size, \eqn{p} its dimension and the
 covariance-matrix \eqn{\Sigma}, which is \code{(n-1)/n} times the sample
 covariance estimate.
 }
 \examples{
 # set dimensions for simulation model
--- a/CVE_C/man/null.Rd
+++ b/CVE_C/man/null.Rd
--- a/CVE_C/man/plot.cve.Rd
+++ b/CVE_C/man/plot.cve.Rd
@ -13,7 +13,10 @@
 [\code{\link{lines}}]}
 }
 \description{
-Boxplots of the loss from \code{min.dim} to \code{max.dim} \code{k} values.
+Boxplots of the output \code{L} from \code{\link{cve}} over \code{k} from
 \code{min.dim} to \code{max.dim}. For given \code{k}, \code{L} corresponds
 to \eqn{L_n(V, X_i)} where \eqn{V \in S(p, p - k)}{V} is the minimizer of
 \eqn{L_n(V)}, for further details see the paper.
 }
 \examples{
 # create B for simulation
@ -41,6 +44,10 @@ cve.obj.simple <- cve(Y ~ X, h = estimate.bandwidth, nObs = sqrt(nrow(X)))
 # elbow plot
 plot(cve.obj.simple)
 }
 \references{
 Fertl Lukas, Bura Efstathia. (2019), Conditional Variance
 Estimation for Sufficient Dimension Reduction. Working Paper.
 }
 \seealso{
 see \code{\link{par}} for graphical parameters to pass through
--- a/CVE_C/man/predict.cve.Rd
+++ b/CVE_C/man/predict.cve.Rd
@ -20,7 +20,8 @@
 prediced response of data \code{newdata}.
 }
 \description{
-Predict responces using reduced data with \code{\link{mars}}.
+Predict response using projected data where the forward model \eqn{g(B'X)}
 is estimated using \code{\link{mars}}.
 }
 \examples{
 # create B for simulation
@ -50,5 +51,5 @@ yhat <- predict(cve.obj.simple, x.test, 1)
 plot(yhat, y.test)
 }
 \seealso{
-\code{\link{cve}}, \code{\link{cve.call}} or \pkg{\link{mars}}.
+\code{\link{cve}}, \code{\link{cve.call}} and \pkg{\link{mars}}.
 }
--- a/CVE_C/man/predict_dim.Rd
+++ b/CVE_C/man/predict_dim.Rd
@ -2,26 +2,40 @@
 % Please edit documentation in R/predict_dim.R
 \name{predict_dim}
 \alias{predict_dim}
-\title{Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.}
+\title{\code{"TODO: @Lukas"}}
 \usage{
-predict_dim(object, ...)
+predict_dim(object, ..., method = "CV")
 }
 \arguments{
-\item{object}{instance of class \code{cve} (result of \code{cve},
+\item{object}{instance of class \code{cve} (result of \code{\link{cve}},
-\code{cve.call}).}
+\code{\link{cve.call}}).}
 \item{...}{ignored.}
 \item{method}{one of \code{"CV"}, \code{"elbow"} or \code{"wilcoxon"}.}
 }
 \value{
-list with
+list with \code{"k"} the predicted dimension and method dependent 
-\itemize{
+     informatoin.
   \item MSE: Mean Square Error,
   \item k: predicted dimensions.
 }
 }
 \description{
-Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.
+\code{"TODO: @Lukas"}
 }
 \section{Method cv}{
 TODO: \code{"TODO: @Lukas"}.
 }
 \section{Method elbow}{
 TODO: \code{"TODO: @Lukas"}.
 }
 \section{Method wilcoxon}{
 TODO: \code{"TODO: @Lukas"}.
 }
 \examples{
 # create B for simulation
 B <- rep(1, 5) / sqrt(5)
--- a/CVE_C/man/rStiefel.Rd
+++ b/CVE_C/man/rStiefel.Rd
@ -2,7 +2,8 @@
 % Please edit documentation in R/util.R
 \name{rStiefel}
 \alias{rStiefel}
-\title{Draws a sample from the invariant measure on the Stiefel manifold \eqn{S(p, q)}.}
+\title{Draws a sample from the invariant measure on the Stiefel manifold
 \eqn{S(p, q)}.}
 \usage{
 rStiefel(p, q)
 }
@ -12,10 +13,11 @@ rStiefel(p, q)
 \item{q}{col dimension}
 }
 \value{
-\code{p} times \code{q} semi-orthogonal matrix.
+\eqn{p \times q}{p x q} semi-orthogonal matrix.
 }
 \description{
-Draws a sample from the invariant measure on the Stiefel manifold \eqn{S(p, q)}.
+Draws a sample from the invariant measure on the Stiefel manifold
 \eqn{S(p, q)}.
 }
 \examples{
 V <- rStiefel(6, 4)
--- a/CVE/man/rgnorm.Rd
+++ b/CVE/man/rgnorm.Rd
@ -0,0 +1,27 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/datasets.R
 \name{rgnorm}
 \alias{rgnorm}
 \title{Generalized Normal Distribution.}
 \usage{
 rgnorm(n = 1, mu = 0, alpha = 1, beta = 1)
 }
 \arguments{
 \item{n}{Number of generated samples.}
 \item{mu}{mean.}
 \item{alpha}{first shape parameter.}
 \item{beta}{second shape parameter.}
 }
 \value{
 numeric array of length \eqn{n}.
 }
 \description{
 Random generation for generalized Normal Distribution.
 }
 \seealso{
 https://en.wikipedia.org/wiki/Generalized_normal_distribution
 }
 \keyword{internal}
--- a/CVE/man/rlaplace.Rd
+++ b/CVE/man/rlaplace.Rd
@ -0,0 +1,25 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/datasets.R
 \name{rlaplace}
 \alias{rlaplace}
 \title{Laplace distribution}
 \usage{
 rlaplace(n = 1, mu = 0, sd = 1)
 }
 \arguments{
 \item{n}{Number of generated samples.}
 \item{mu}{mean.}
 \item{sd}{standard deviation.}
 }
 \value{
 numeric array of length \eqn{n}.
 }
 \description{
 Random generation for Laplace distribution.
 }
 \seealso{
 https://en.wikipedia.org/wiki/Laplace_distribution
 }
 \keyword{internal}
--- a/CVE/man/rmvnorm.Rd
+++ b/CVE/man/rmvnorm.Rd
@ -0,0 +1,29 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/datasets.R
 \name{rmvnorm}
 \alias{rmvnorm}
 \title{Multivariate Normal Distribution.}
 \usage{
 rmvnorm(n = 1, mu = rep(0, p), sigma = diag(p))
 }
 \arguments{
 \item{n}{number of samples.}
 \item{mu}{mean}
 \item{sigma}{covariance matrix.}
 }
 \value{
 a \eqn{n\times p}{n x p} matrix with samples in its rows.
 }
 \description{
 Random generation for the multivariate normal distribution.
 \deqn{X \sim N_p(\mu, \Sigma)}{X ~ N_p(\mu, \Sigma)}
 }
 \examples{
 \dontrun{
 rmvnorm(20, sigma = matrix(c(2, 1, 1, 2), 2))
 rmvnorm(20, mu = c(3, -1, 2))
 }
 }
 \keyword{internal}
--- a/CVE/man/rmvt.Rd
+++ b/CVE/man/rmvt.Rd
@ -0,0 +1,34 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/datasets.R
 \name{rmvt}
 \alias{rmvt}
 \title{Multivariate t distribution.}
 \usage{
 rmvt(n = 1, mu = rep(0, p), sigma = diag(p), df = Inf)
 }
 \arguments{
 \item{n}{number of samples.}
 \item{mu}{mean}
 \item{sigma}{a \eqn{k\times k}{k x k} positive definite matrix. If the degree
 \eqn{\nu} if bigger than 2 the created covariance is
 \deqn{var(x) = \Sigma\frac{\nu}{\nu - 2}}
 for \eqn{\nu > 2}.}
 \item{df}{degree of freedom \eqn{\nu}.}
 }
 \value{
 a \eqn{n\times p}{n x p} matrix with samples in its rows.
 }
 \description{
 Random generation from multivariate t distribution (student distribution).
 }
 \examples{
 \dontrun{
 rmvt(20, c(0, 1), matrix(c(3, 1, 1, 2), 2), 3)
 rmvt(20, sigma = matrix(c(2, 1, 1, 2), 2), 3)
 rmvt(20, mu = c(3, -1, 2), 3)
 }
 }
 \keyword{internal}
--- a/CVE_C/man/summary.cve.Rd
+++ b/CVE_C/man/summary.cve.Rd
@ -7,12 +7,13 @@
 \method{summary}{cve}(object, ...)
 }
 \arguments{
-\item{object}{Instance of 'cve' as returned by \code{cve}.}
+\item{object}{Instance of \code{"cve"} as returned by \code{\link{cve}}.}
 \item{...}{ignored.}
 }
 \description{
-Prints a summary of a \code{cve} result.
+Prints a summary statistics of output \code{L} from \code{cve} for
 \code{k = min.dim, ..., max.dim}.
 }
 \examples{
 # create B for simulation
--- a/CVE_C/src/Makevars
+++ b/CVE_C/src/Makevars
--- a/CVE_C/src/Makevars.win
+++ b/CVE_C/src/Makevars.win
--- a/CVE_C/src/callLogger.c
+++ b/CVE_C/src/callLogger.c
--- a/CVE_C/src/cve.c
+++ b/CVE_C/src/cve.c
--- a/CVE_C/src/cve.h
+++ b/CVE_C/src/cve.h
--- a/CVE_C/src/cve_subroutines.c
+++ b/CVE_C/src/cve_subroutines.c
@ -161,97 +161,3 @@ mat* adjacence(const mat *vec_L, const mat *vec_Y, const mat *vec_y1,
    return mat_S;
 }
 // int getWorkLen(const int n, const int p, const int q) {
 //     int mpq; /**< Max of p and q */
 //     int nn = ((n - 1) * n) / 2;
 //     if (p > q) {
 //         mpq = p;
 //     } else {
 //         mpq = q;
 //     }
 //     if (nn * p < (mpq + 1) * mpq) {
 //         return 2 * (mpq + 1) * mpq;
 //     } else {
 //         return (nn + mpq) * mpq;
 //     }
 // }
 // double cost(const unsigned int method,
 //             const int n,
 //             const double *Y,
 //             const double *vecK,
 //             const double *colSums,
 //             double *y1, double *L) {
 //     int i, j, k;
 //     double tmp, sum;
 //     for (i = 0; i < n; ++i) {
 //         y1[i] = Y[i];
 //         L[i] = Y[i] * Y[i];
 //     }
 //     for (k = j = 0; j < n; ++j) {
 //         for (i = j + 1; i < n; ++i, ++k) {
 //             y1[i] += Y[j] * vecK[k];
 //             y1[j] += Y[i] * vecK[k];
 //             L[i] += Y[j] * Y[j] * vecK[k];
 //             L[j] += Y[i] * Y[i] * vecK[k];
 //         }
 //     }
 //     for (i = 0; i < n; ++i) {
 //         y1[i] /= colSums[i];
 //         L[i] /= colSums[i];
 //     }
 //     tmp = 0.0;
 //     if (method == CVE_METHOD_WEIGHTED) {
 //         sum = 0.0;
 //         for (i = 0; i < n; ++i) {
 //             tmp += (colSums[i] - 1.0) * (L[i] -= y1[i] * y1[i]);
 //             sum += colSums[i];
 //         }
 //         return tmp / (sum - (double)n); // TODO: check for division by zero!
 //     } else {
 //         for (i = 0; i < n; ++i) {
 //             tmp += (L[i] -= y1[i] * y1[i]);
 //         }
 //         return tmp / (double)n;
 //     }
 // }
 // void scaling(const unsigned int method,
 //              const int n,
 //              const double *Y, const double *y1, const double *L,
 //              const double *vecD, const double *vecK,
 //              const double *colSums,
 //              double *vecS) {
 //     int i, j, k, nn = (n * (n - 1)) / 2;
 //     double tmp;
 //     if (method == CVE_METHOD_WEIGHTED) {
 //         for (k = j = 0; j < n; ++j) {
 //             for (i = j + 1; i < n; ++i, ++k) {
 //                 tmp = Y[j] - y1[i];
 //                 vecS[k]  = (L[i] - (tmp * tmp));
 //                 tmp = Y[i] - y1[j];
 //                 vecS[k] += (L[j] - (tmp * tmp));
 //             }
 //         }
 //     } else {
 //         for (k = j = 0; j < n; ++j) {
 //             for (i = j + 1; i < n; ++i, ++k) {
 //                 tmp = Y[j] - y1[i];
 //                 vecS[k]  = (L[i] - (tmp * tmp)) / colSums[i];
 //                 tmp = Y[i] - y1[j];
 //                 vecS[k] += (L[j] - (tmp * tmp)) / colSums[j];
 //             }
 //         }
 //     }
 //     for (k = 0; k < nn; ++k) {
 //         vecS[k] *= vecK[k] * vecD[k];
 //     }
 // }
--- a/CVE_C/src/export.c
+++ b/CVE_C/src/export.c
--- a/CVE_C/src/init.c
+++ b/CVE_C/src/init.c
@ -17,8 +17,8 @@ static const R_CallMethodDef CallEntries[] = {
    {NULL, NULL, 0}
 };
-/* Restrict C entrypoints to registered routines. */
+/* Restrict C entry points to registered routines. */
-void R_initCVE(DllInfo *dll) {
+void R_init_CVE(DllInfo *dll) {
    R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
    R_useDynamicSymbols(dll, FALSE);
 }
--- a/CVE_C/src/matrix.c
+++ b/CVE_C/src/matrix.c
@ -914,7 +914,7 @@ mat* laplace(mat *A, double *workMem) {
 *      \_____/     \_____/
 *        IpA   C =   ImA   B
 *                  \_______/
- *        IpA   C =     Y     ==>  C = IpA^-1 Y
+ *        IpA   C =     Y     ==> C = IpA^-1 Y
 *
 * @param A Skew-Symmetric matrix of dimension `(n, n)`.
 * @param B Matrix of dimensions `(n, m)` with `m <= n`.
--- a/CVE_C/src/rStiefel.c
+++ b/CVE_C/src/rStiefel.c
--- a/CVE_C/R/datasets.R
+++ b/CVE_C/R/datasets.R
@ -1,193 +0,0 @@
 #'
 #' @param n number of samples.
 #' @param mu mean
 #' @param sigma covariance matrix.
 #'
 #' @returns a \eqn{n\times p} matrix with samples in its rows.
 #'
 #' @examples
 #' rmvnorm(20, sigma = matrix(c(2, 1, 1, 2), 2))
 #' rmvnorm(20, mu = c(3, -1, 2))
 rmvnorm <- function(n = 1, mu = rep(0, p), sigma = diag(p)) {
    if (!missing(sigma)) {
        p <- nrow(sigma)
    } else if (!missing(mu)) {
        mu <- matrix(mu, ncol = 1)
        p <- nrow(mu)
    } else {
        stop("At least one of 'mu' or 'sigma' must be supplied.")
    }
    # See: https://en.wikipedia.org/wiki/Multivariate_normal_distribution
    return(rep(mu, each = n) + matrix(rnorm(n * p), n) %*% chol(sigma))
 }
 #' Samples from the multivariate t distribution (student distribution).
 #'
 #' @param n number of samples.
 #' @param mu mean, ... TODO:
 #' @param sigma a \eqn{k\times k} positive definite matrix. If the degree
 #' \eqn{\nu} if bigger than 2 the created covariance is
 #' \deqn{var(x) = \Sigma\frac{\nu}{\nu - 2}}
 #' for \eqn{\nu > 2}.
 #' @param df degree of freedom \eqn{\nu}.
 #'
 #' @returns a \eqn{n\times p} matrix with samples in its rows.
 #'
 #' @examples
 #' rmvt(20, c(0, 1), matrix(c(3, 1, 1, 2), 2), 3)
 #' rmvt(20, sigma = matrix(c(2, 1, 1, 2), 2), 3)
 #' rmvt(20, mu = c(3, -1, 2), 3)
 rmvt <- function(n = 1, mu = rep(0, p), sigma = diag(p), df = Inf) {
    if (!missing(sigma)) {
        p <- nrow(sigma)
    } else if (!missing(mu)) {
        mu <- matrix(mu, ncol = 1)
        p <- nrow(mu)
    } else {
        stop("At least one of 'mu' or 'sigma' must be supplied.")
    }
    if (df == Inf) {
        Z <- 1
    } else {
        Z <- sqrt(df / rchisq(n, df))
    }
    return(rmvnorm(n, sigma = sigma) * Z + rep(mu, each = n))
 }
 #' Generalized Normal Distribution.
 #' see: https://en.wikipedia.org/wiki/Generalized_normal_distribution
 rgnorm <- function(n = 1, mu = 0, alpha = 1, beta = 1) {
    if (alpha <= 0 | beta <= 0) {
        stop("alpha and beta must be positive.")
    }
    lambda <- (1 / alpha)^beta
    scales <- qgamma(runif(n), shape = 1 / beta, scale = 1 / lambda)^(1 / beta)
    return(scales * ((-1)^rbinom(n, 1, 0.5)) + mu)
 }
 #' Laplace distribution
 #' see: https://en.wikipedia.org/wiki/Laplace_distribution
 rlaplace <- function(n = 1, mu = 0, sigma = 1) {
    U <- runif(n, -0.5, 0.5)
    scale <- sigma / sqrt(2)
    return(mu - scale * sign(U) * log(1 - 2 * abs(U)))
 }
 #' Generates test datasets.
 #'
 #' Provides sample datasets. There are 5 different datasets named
 #' M1, M2, M3, M4 and M5 described in the paper references below.
 #' The general model is given by:
 #' \deqn{Y = g(B'X) + \epsilon}
 #'
 #' @param name One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}
 #' @param n nr samples
 #' @param B SDR basis used for dataset creation if supplied.
 #' @param p Dim. of random variable \code{X}.
 #' @param p.mix Only for \code{"M4"}, see: below.
 #' @param lambda Only for \code{"M4"}, see: below.
 #'
 #' @return List with elements
 #' \itemize{
 #'      \item{X}{data}
 #'      \item{Y}{response}
 #'      \item{B}{Used dim-reduction matrix}
 #'      \item{name}{Name of the dataset (name parameter)}
 #' }
 #'
 #' @section M1:
 #' The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
 #' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
 #' The link function \eqn{g} is given as
 #' \deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + \epsilon / 2}{%
 #'       g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + epsilon / 2}
 #' @section M2:
 #' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a
 #' default of \eqn{n = 200} data points.
 #' The link function \eqn{g} is given as
 #' \deqn{g(x) = (b_1^T X) (b_2^T X)^2 + \epsilon / 2}
 #' @section M3:
 #' \deqn{g(x) = cos(b_1^T X) + \epsilon / 2}
 #' @section M4:
 #' TODO:
 #' @section M5:
 #' TODO:
 #'
 #' @import stats
 #' @importFrom stats rnorm rbinom
 #' @export
 dataset <- function(name = "M1", n = NULL, p = 20, sigma = 0.5, ...) {
    name <- toupper(name)
    if (nchar(name) == 1) { name <- paste0("M", name) }
    if (name == "M1") {
        if (missing(n)) { n <- 100 }
        # B ... `p x 1`
        B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
        X <- rmvnorm(n, sigma = sigma^abs(outer(1:p, 1:p, FUN = `-`)))
        beta <- 0.5
        Y <- cos(X %*% B) + rgnorm(n, 0,
            alpha = sqrt(0.25 * gamma(1 / beta) / gamma(3 / beta)),
            beta = beta
        )
    } else if (name == "M2") {
        if (missing(n)) { n <- 100 }
        prob <- 0.3
        lambda <- 1 # dispersion
        # B ... `p x 1`
        B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
        Z <- 2 * rbinom(n, 1, prob) - 1
        X <- matrix(rep(lambda * Z, p) + rnorm(n * p), n)
        Y <- cos(X %*% B) + rnorm(n, 0, sigma)
    } else if (name == "M3") {
        if (missing(n)) { n <- 200 }
        # B ... `p x 1`
        B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
        X <- matrix(rnorm(n * p), n)
        Y <- 1.5 * log(2 + abs(X %*% B)) + rnorm(n, 0, sigma^2)
    } else if (name == "M4") {
        if (missing(n)) { n <- 200 }
        # B ... `p x 2`
        B <- cbind(
            c(rep(1 / sqrt(6), 6), rep(0, p - 6)),
            c(rep(c(1, -1), 3) / sqrt(6), rep(0, p - 6))
        )
        X <- rmvnorm(n, sigma = sigma^abs(outer(1:p, 1:p, FUN = `-`)))
        XB <- X %*% B
        Y <- (XB[, 1]) / (0.5 + (XB[, 2] + 1.5)^2) + rnorm(n, 0, sigma^2)
    } else if (name == "M5") {
        if (missing(n)) { n <- 200 }
        # B ... `p x 2`
        B <- cbind(
            c(rep(1,        6), rep(0, p - 6)),
            c(rep(c(1, -1), 3), rep(0, p - 6))
        ) / sqrt(6)
        X <- matrix(runif(n * p), n)
        XB <- X %*% B
        Y <- cos(XB[, 1] * pi) * (XB[, 2] + 1)^2 + rnorm(n, 0, sigma^2)
    } else if (name == "M6") {
        if (missing(n)) { n <- 200 }
        # B ... `p x 3`
        B <- diag(p)[, -(3:(p - 1))]
        X <- matrix(rnorm(n * p), n)
        Y <- rowSums((X %*% B)^2) + rnorm(n, 0, sigma^2)
    } else if (name == "M7") {
        if (missing(n)) { n <- 400 }
        # B ... `p x 4`
        B <- diag(p)[, -(4:(p - 1))]
        # "R"andom "M"ulti"V"ariate "S"tudent
        X <- rmvt(n = n, sigma = diag(p), df = 3)
        XB <- X %*% B
        Y <- (XB[, 1]) * (XB[, 2])^2 + (XB[, 3]) * (XB[, 4])
        Y <- Y + rlaplace(n, 0, sigma)
    } else {
        stop("Got unknown dataset name.")
    }
    return(list(X = X, Y = Y, B = B, name = name))
 }
--- a/CVE_C/R/util.R
+++ b/CVE_C/R/util.R
@ -1,82 +0,0 @@
 #' Draws a sample from the invariant measure on the Stiefel manifold \eqn{S(p, q)}.
 #'
 #' @param p row dimension
 #' @param q col dimension
 #' @return \code{p} times \code{q} semi-orthogonal matrix.
 #' @examples
 #'  V <- rStiefel(6, 4)
 #' @export
 rStiefel <- function(p, q) {
    return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
 }
 #' Retraction to the manifold.
 #'
 #' @param A matrix.
 #' @return `(p, q)` semi-orthogonal matrix, aka element of the Stiefel manifold.
 #' @keywords internal
 #' @export
 retractStiefel <- function(A) {
    return(qr.Q(qr(A)))
 }
 #' Skew-Symmetric matrix computed from `A` as
 #' \eqn{1/2 (A - A^T)}.
 #' @param A Matrix of dim `(p, q)`
 #' @return Skew-Symmetric matrix of dim `(p, p)`.
 #' @keywords internal
 #' @export
 skew <- function(A) {
    0.5 * (A - t(A))
 }
 #' Symmetric matrix computed from `A` as
 #' \eqn{1/2 (A + A^T)}.
 #' @param A Matrix of dim `(p, q)`
 #' @return Symmetric matrix of dim `(p, p)`.
 #' @keywords internal
 #' @export
 sym <- function(A) {
    0.5 * (A + t(A))
 }
 #' Orthogonal Projection onto the tangent space of the stiefel manifold.
 #'
 #' @param V Point on the stiefel manifold.
 #' @param G matrix to be projected onto the tangent space at `V`.
 #' @return `(p, q)` matrix as element of the tangent space at `V`.
 #' @keywords internal
 #' @export
 projTangentStiefel <- function(V, G) {
    Q <- diag(1, nrow(V)) - V %*% t(V)
    return(Q %*% G + V %*% skew(t(V) %*% G))
 }
 #' Null space basis of given matrix `V`
 #'
 #' @param V `(p, q)` matrix
 #' @return Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`.
 #' @keywords internal
 #' @export
 null <- function(V) {
    tmp <- qr(V)
    set <- if(tmp$rank == 0L) seq_len(ncol(V)) else -seq_len(tmp$rank)
    return(qr.Q(tmp, complete = TRUE)[, set, drop = FALSE])
 }
 #' Creates a (numeric) matrix where each column contains
 #' an element to element matching.
 #' @param elements numeric vector of elements to match
 #' @return matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`.
 #' @keywords internal
 #' @examples
 #'  elem.pairs(seq.int(2, 5))
 #' @export
 elem.pairs <- function(elements) {
    # Number of elements to match.
    n <- length(elements)
    # Create all combinations.
    pairs <- rbind(rep(elements, each=n), rep(elements, n))
    # Select unique combinations without self interaction.
    return(pairs[, pairs[1, ] < pairs[2, ]])
 }
--- a/CVE_C/man/dataset.Rd
+++ b/CVE_C/man/dataset.Rd
@ -1,68 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/datasets.R
 \name{dataset}
 \alias{dataset}
 \title{Generates test datasets.}
 \usage{
 dataset(name = "M1", n, B, p.mix = 0.3, lambda = 1)
 }
 \arguments{
 \item{name}{One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}}
 \item{n}{nr samples}
 \item{B}{SDR basis used for dataset creation if supplied.}
 \item{p.mix}{Only for \code{"M4"}, see: below.}
 \item{lambda}{Only for \code{"M4"}, see: below.}
 \item{p}{Dim. of random variable \code{X}.}
 }
 \value{
 List with elements
 \itemize{
     \item{X}{data}
     \item{Y}{response}
     \item{B}{Used dim-reduction matrix}
     \item{name}{Name of the dataset (name parameter)}
 }
 }
 \description{
 Provides sample datasets. There are 5 different datasets named
 M1, M2, M3, M4 and M5 described in the paper references below.
 The general model is given by:
 \deqn{Y = g(B'X) + \epsilon}
 }
 \section{M1}{
 The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
 dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
 The link function \eqn{g} is given as
 \deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + \epsilon / 2}{%
      g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + epsilon / 2}
 }
 \section{M2}{
 \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a
 default of \eqn{n = 200} data points.
 The link function \eqn{g} is given as
 \deqn{g(x) = (b_1^T X) (b_2^T X)^2 + \epsilon / 2}
 }
 \section{M3}{
 \deqn{g(x) = cos(b_1^T X) + \epsilon / 2}
 }
 \section{M4}{
 TODO:
 }
 \section{M5}{
 TODO:
 }
--- a/CVE_C/man/elem.pairs.Rd
+++ b/CVE_C/man/elem.pairs.Rd
@ -1,23 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/util.R
 \name{elem.pairs}
 \alias{elem.pairs}
 \title{Creates a (numeric) matrix where each column contains
 an element to element matching.}
 \usage{
 elem.pairs(elements)
 }
 \arguments{
 \item{elements}{numeric vector of elements to match}
 }
 \value{
 matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`.
 }
 \description{
 Creates a (numeric) matrix where each column contains
 an element to element matching.
 }
 \examples{
 elem.pairs(seq.int(2, 5))
 }
 \keyword{internal}
--- a/CVE_C/man/projTangentStiefel.Rd
+++ b/CVE_C/man/projTangentStiefel.Rd
@ -1,20 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/util.R
 \name{projTangentStiefel}
 \alias{projTangentStiefel}
 \title{Orthogonal Projection onto the tangent space of the stiefel manifold.}
 \usage{
 projTangentStiefel(V, G)
 }
 \arguments{
 \item{V}{Point on the stiefel manifold.}
 \item{G}{matrix to be projected onto the tangent space at `V`.}
 }
 \value{
 `(p, q)` matrix as element of the tangent space at `V`.
 }
 \description{
 Orthogonal Projection onto the tangent space of the stiefel manifold.
 }
 \keyword{internal}
--- a/CVE_C/man/retractStiefel.Rd
+++ b/CVE_C/man/retractStiefel.Rd
@ -1,18 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/util.R
 \name{retractStiefel}
 \alias{retractStiefel}
 \title{Retraction to the manifold.}
 \usage{
 retractStiefel(A)
 }
 \arguments{
 \item{A}{matrix.}
 }
 \value{
 `(p, q)` semi-orthogonal matrix, aka element of the Stiefel manifold.
 }
 \description{
 Retraction to the manifold.
 }
 \keyword{internal}
--- a/CVE_C/man/skew.Rd
+++ b/CVE_C/man/skew.Rd
@ -1,20 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/util.R
 \name{skew}
 \alias{skew}
 \title{Skew-Symmetric matrix computed from `A` as
 \eqn{1/2 (A - A^T)}.}
 \usage{
 skew(A)
 }
 \arguments{
 \item{A}{Matrix of dim `(p, q)`}
 }
 \value{
 Skew-Symmetric matrix of dim `(p, p)`.
 }
 \description{
 Skew-Symmetric matrix computed from `A` as
 \eqn{1/2 (A - A^T)}.
 }
 \keyword{internal}
--- a/CVE_C/man/sym.Rd
+++ b/CVE_C/man/sym.Rd
@ -1,20 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/util.R
 \name{sym}
 \alias{sym}
 \title{Symmetric matrix computed from `A` as
 \eqn{1/2 (A + A^T)}.}
 \usage{
 sym(A)
 }
 \arguments{
 \item{A}{Matrix of dim `(p, q)`}
 }
 \value{
 Symmetric matrix of dim `(p, p)`.
 }
 \description{
 Symmetric matrix computed from `A` as
 \eqn{1/2 (A + A^T)}.
 }
 \keyword{internal}
--- a/CVE_R/DESCRIPTION
+++ b/CVE_R/DESCRIPTION
@ -1,11 +0,0 @@
 Package: CVEpureR
 Type: Package
 Title: Conditional Variance Estimator for Sufficient Dimension Reduction
 Version: 0.1
 Date: 2019-08-29
 Author: Loki
 Maintainer: Loki <loki@no.mail>
 Description: Implementation of the Conditional Variance Estimation (CVE) method. This package version is writen in pure R.
 License: GPL-3
 Encoding: UTF-8
 RoxygenNote: 6.1.1
--- a/CVE_R/NAMESPACE
+++ b/CVE_R/NAMESPACE
@ -1,23 +0,0 @@
 # Generated by roxygen2: do not edit by hand
 S3method(plot,cve)
 S3method(summary,cve)
 export(cve)
 export(cve.call)
 export(cve.grid.search)
 export(cve_linesearch)
 export(cve_sgd)
 export(cve_simple)
 export(dataset)
 export(elem.pairs)
 export(estimate.bandwidth)
 export(grad)
 export(null)
 export(rStiefl)
 import(stats)
 importFrom(graphics,lines)
 importFrom(graphics,plot)
 importFrom(graphics,points)
 importFrom(stats,model.frame)
 importFrom(stats,rbinom)
 importFrom(stats,rnorm)
--- a/CVE_R/R/CVE.R
+++ b/CVE_R/R/CVE.R
@ -1,265 +0,0 @@
 #' Conditional Variance Estimator (CVE)
 #'
 #' Conditional Variance Estimator for Sufficient Dimension
 #'  Reduction
 #'
 #' TODO: And some details
 #'
 #'
 #' @references Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
 #'
 #' @docType package
 #' @author Loki
 #' @useDynLib CVE, .registration = TRUE
 "_PACKAGE"
 #' Implementation of the CVE method.
 #'
 #' Conditional Variance Estimator (CVE) is a novel sufficient dimension
 #' reduction (SDR) method assuming a model
 #' \deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
 #' where B'X is a lower dimensional projection of the predictors.
 #'
 #' @param formula Formel for the regression model defining `X`, `Y`.
 #'  See: \code{\link{formula}}.
 #' @param data data.frame holding data for formula.
 #' @param method The different only differe in the used optimization.
 #'  All of them are Gradient based optimization on a Stiefel manifold.
 #' \itemize{
 #'      \item "simple" Simple reduction of stepsize.
 #'      \item "sgd" stocastic gradient decent.
 #'      \item TODO: further
 #' }
 #' @param ... Further parameters depending on the used method.
 #' @examples
 #' library(CVE)
 #'
 #' # sample dataset
 #' ds <- dataset("M5")
 #'
 #' # call ´cve´ with default method (aka "simple")
 #' dr.simple <- cve(ds$Y ~ ds$X, k = ncol(ds$B))
 #' # plot optimization history (loss via iteration)
 #' plot(dr.simple, main = "CVE M5 simple")
 #'
 #' # call ´cve´ with method "linesearch" using ´data.frame´ as data.
 #' data <- data.frame(Y = ds$Y, X = ds$X)
 #' # Note: ´Y, X´ are NOT defined, they are extracted from ´data´.
 #' dr.linesearch <- cve(Y ~ ., data, method = "linesearch", k = ncol(ds$B))
 #' plot(dr.linesearch, main = "CVE M5 linesearch")
 #'
 #' @references Fertl L., Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
 #'
 #' @seealso \code{\link{formula}}. For a complete parameters list (dependent on
 #'  the method) see \code{\link{cve_simple}}, \code{\link{cve_sgd}}
 #' @import stats
 #' @importFrom stats model.frame
 #' @export
 cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
    # check for type of `data` if supplied and set default
    if (missing(data)) {
        data <- environment(formula)
    } else if (!is.data.frame(data)) {
        stop("Parameter 'data' must be a 'data.frame' or missing.")
    }
    # extract `X`, `Y` from `formula` with `data`
    model <- stats::model.frame(formula, data)
    X <- as.matrix(model[ ,-1L, drop = FALSE])
    Y <- as.double(model[ , 1L])
    # pass extracted data on to [cve.call()]
    dr <- cve.call(X, Y, method = method, max.dim = max.dim, ...)
    # overwrite `call` property from [cve.call()]
    dr$call <- match.call()
    return(dr)
 }
 #' @param nObs as described in the Paper.
 #' @param X Data
 #' @param Y Responces
 #' @param nObs Like in the paper.
 #' @param k guess for SDR dimension.
 #' @param ... Method specific parameters.
 #' @rdname cve
 #' @export
 cve.call <- function(X, Y, method = "simple",
                     nObs = sqrt(nrow(X)), h = NULL,
                     min.dim = 1L, max.dim = 10L, k = NULL,
                     tau = 1.0, tol = 1e-3,
                     epochs = 50L, attempts = 10L,
                     logger = NULL) {
    # parameter checking
    if (!(is.matrix(X) && is.numeric(X))) {
        stop("Parameter 'X' should be a numeric matrices.")
    }
    if (!is.numeric(Y)) {
        stop("Parameter 'Y' must be numeric.")
    }
    if (is.matrix(Y) || !is.double(Y)) {
        Y <- as.double(Y)
    }
    if (nrow(X) != length(Y)) {
        stop("Rows of 'X' and 'Y' elements are not compatible.")
    }
    if (ncol(X) < 2) {
        stop("'X' is one dimensional, no need for dimension reduction.")
    }
    if (missing(k) || is.null(k)) {
        min.dim <- as.integer(min.dim)
        max.dim <- as.integer(min(max.dim, ncol(X) - 1L))
    } else {
        min.dim <- as.integer(k)
        max.dim <- as.integer(k)
    }
    if (min.dim > max.dim) {
        stop("'min.dim' bigger 'max.dim'.")
    }
    if (max.dim >= ncol(X)) {
        stop("'max.dim' (or 'k') must be smaller than 'ncol(X)'.")
    }
    if (is.function(h)) {
        estimate.bandwidth <- h
        h <- NULL
    }
    if (!is.numeric(tau) || length(tau) > 1L || tau <= 0.0) {
        stop("Initial step-width 'tau' must be positive number.")
    } else {
        tau <- as.double(tau)
    }
    if (!is.numeric(tol) || length(tol) > 1L || tol < 0.0) {
        stop("Break condition tolerance 'tol' must be not negative number.")
    } else {
        tol <- as.double(tol)
    }
    if (!is.numeric(epochs) || length(epochs) > 1L) {
        stop("Parameter 'epochs' must be positive integer.")
    } else if (!is.integer(epochs)) {
        epochs <- as.integer(epochs)
    }
    if (epochs < 1L) {
        stop("Parameter 'epochs' must be at least 1L.")
    }
    if (!is.numeric(attempts) || length(attempts) > 1L) {
        stop("Parameter 'attempts' must be positive integer.")
    } else if (!is.integer(attempts)) {
        attempts <- as.integer(attempts)
    }
    if (attempts < 1L) {
        stop("Parameter 'attempts' must be at least 1L.")
    }
    if (is.function(logger)) {
        loggerEnv <- environment(logger)
    } else {
        loggerEnv <- NULL
    }
    # Call specified method.
    method <- tolower(method)
    call <- match.call()
    dr <- list()
    for (k in min.dim:max.dim) {
        if (missing(h) || is.null(h)) {
            h <- estimate.bandwidth(X, k, nObs)
        } else if (is.numeric(h) && h > 0.0) {
            h <- as.double(h)
        } else {
            stop("Bandwidth 'h' must be positive numeric.")
        }
        if (method == 'simple') {
            dr.k <- .Call('cve_simple', PACKAGE = 'CVE',
                          X, Y, k, h,
                          tau, tol,
                          epochs, attempts,
                          logger, loggerEnv)
        #     dr.k <- cve_simple(X, Y, k, nObs = nObs, ...)
        # } else if (method == 'linesearch') {
        #     dr.k <- cve_linesearch(X, Y, k, nObs = nObs, ...)
        # } else if (method == 'rcg') {
        #     dr.k <- cve_rcg(X, Y, k, nObs = nObs, ...)
        # } else if (method == 'momentum') {
        #     dr.k <- cve_momentum(X, Y, k, nObs = nObs, ...)
        # } else if (method == 'rmsprob') {
        #     dr.k <- cve_rmsprob(X, Y, k, nObs = nObs, ...)
        # } else if (method == 'sgdrmsprob') {
        #     dr.k <- cve_sgdrmsprob(X, Y, k, nObs = nObs, ...)
        # } else if (method == 'sgd') {
        #     dr.k <- cve_sgd(X, Y, k, nObs = nObs, ...)
        } else {
            stop('Got unknown method.')
        }
        dr.k$B <- null(dr.k$V)
        dr.k$loss <- mean(dr.k$L)
        dr.k$h <- h
        dr.k$k <- k
        class(dr.k) <- "cve.k"
        dr[[k]] <- dr.k
    }
    # augment result information
    dr$method <- method
    dr$call <- call
    class(dr) <- "cve"
    return(dr)
 }
 #' Ploting helper for objects of class \code{cve}.
 #'
 #' @param x Object of class \code{cve} (result of [cve()]).
 #' @param content Specifies what to plot:
 #' \itemize{
 #'      \item "history" Plots the loss history from stiefel optimization
 #'          (default).
 #'      \item ... TODO: add (if there are any)
 #' }
 #' @param ... Pass through parameters to [plot()] and [lines()]
 #'
 #' @usage ## S3 method for class 'cve'
 #' plot(x, content = "history", ...)
 #' @seealso see \code{\link{par}} for graphical parameters to pass through
 #'      as well as \code{\link{plot}} for standard plot utility.
 #' @importFrom graphics plot lines points
 #' @method plot cve
 #' @export
 plot.cve <- function(x, ...) {
    L <- c()
    k <- c()
    for (dr.k in x) {
        if (class(dr.k) == 'cve.k') {
            k <- c(k, paste0(dr.k$k))
            L <- c(L, dr.k$L)
        }
    }
    L <- matrix(L, ncol = length(k))
    boxplot(L, main = "Loss ...",
            xlab = "SDR dimension k",
            ylab = expression(L(V, X[i])),
            names = k)
 }
 #' Prints a summary of a \code{cve} result.
 #' @param object Instance of 'cve' as return of \code{cve}.
 #' @method summary cve
 #' @export
 summary.cve <- function(object, ...) {
    cat('Summary of CVE result - Method: "', object$method, '"\n',
        '\n',
        'Dataset size:   ', nrow(object$X), '\n',
        'Data Dimension: ', ncol(object$X), '\n',
        'SDR Dimension:  ', object$k, '\n',
        'loss:           ', object$loss, '\n',
        '\n',
        'Called via:\n',
        '    ',
        sep='')
    print(object$call)
 }
--- a/CVE_R/R/cve_linesearch.R
+++ b/CVE_R/R/cve_linesearch.R
@ -1,169 +0,0 @@
 #' Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
 #' conditions.
 #'
 #' @keywords internal
 #' @export
 cve_linesearch <- function(X, Y, k,
                           nObs = sqrt(nrow(X)),
                           h = NULL,
                           tau = 1.0,
                           tol = 1e-3,
                           rho1 = 0.1,
                           rho2 = 0.9,
                           slack = 0,
                           epochs = 50L,
                           attempts = 10L,
                           max.linesearch.iter = 10L,
                           logger = NULL
 ) {
    # Set `grad` functions environment to enable if to find this environments
    # local variabels, needed to enable the manipulation of this local variables
    # from within `grad`.
    environment(grad) <- environment()
    # Get dimensions.
    n <- nrow(X)
    p <- ncol(X)
    q <- p - k
    # Save initial learning rate `tau`.
    tau.init <- tau
    # Addapt tolearance for break condition.
    tol <- sqrt(2 * q) * tol
    # Estaimate bandwidth if not given.
    if (missing(h) | !is.numeric(h)) {
        h <- estimate.bandwidth(X, k, nObs)
    }
    # Compute persistent data.
    # Compute lookup indexes for symmetrie, lower/upper
    # triangular parts and vectorization.
    pair.index <- elem.pairs(seq(n))
    i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
    j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
    # Matrix of vectorized indices. (vec(index) -> seq)
    index <- matrix(seq(n * n), n, n)
    lower <- index[lower.tri(index)]
    upper <- t(index)[lower]
    # Create all pairewise differences of rows of `X`.
    X_diff <- X[i, , drop = F] - X[j, , drop = F]
    # Identity matrix.
    I_p <- diag(1, p)
    # Init tracking of current best (according multiple attempts).
    V.best <- NULL
    loss.best <- Inf
    # Start loop for multiple attempts.
    for (attempt in 1:attempts) {
        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
        # optimization start value.
        V <- rStiefl(p, q)
        # Initial loss and gradient.
        loss <- Inf
        G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
        # Set last loss (aka, loss after applying the step).
        loss.last <- loss
        # Call logger with initial values before starting optimization.
        if (is.function(logger)) {
            epoch <- 0 # Set epoch count to 0 (only relevant for logging).
            error <- NA
            logger(environment())
        }
        ## Start optimization loop.
        for (epoch in 1:epochs) {
            # Cayley transform matrix `A`
            A <- (G %*% t(V)) - (V %*% t(G))
            # Directional derivative of the loss at current position, given
            # as `Tr(G^T \cdot A \cdot V)`.
            loss.prime <- -0.5 * norm(A, type = 'F')^2
            # Linesearch
            tau.upper <- Inf
            tau.lower <- 0
            tau <- tau.init
            for (iter in 1:max.linesearch.iter) {
                # Apply learning rate `tau`.
                A.tau <- (tau / 2) * A
                # Parallet transport (on Stiefl manifold) into direction of `G`.
                inv <- solve(I_p + A.tau)
                V.tau <- inv %*% ((I_p - A.tau) %*% V)
                # Loss at position after a step.
                loss <- Inf # aka loss.tau
                G.tau <- grad(X, Y, V.tau, h, loss.out = TRUE, persistent = TRUE)
                # Armijo condition.
                if (loss > loss.last + (rho1 * tau * loss.prime)) {
                    tau.upper <- tau
                    tau <- (tau.lower + tau.upper) / 2
                    next()
                }
                V.prime.tau <- -0.5 * inv %*% A %*% (V + V.tau)
                loss.prime.tau <- sum(G * V.prime.tau) # Tr(grad(tau)^T \cdot Y^'(tau))
                # Wolfe condition.
                if (loss.prime.tau < rho2 * loss.prime) {
                    tau.lower <- tau
                    if (tau.upper == Inf) {
                        tau <- 2 * tau.lower
                    } else {
                        tau <- (tau.lower + tau.upper) / 2
                    }
                } else {
                    break()
                }
            }
            # Compute error.
            error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
            # Check break condition (epoch check to skip ignored gradient calc).
            # Note: the devision by `sqrt(2 * k)` is included in `tol`.
            if (error < tol | epoch >= epochs) {
                # take last step and stop optimization.
                V <- V.tau
                # Final call to the logger before stopping optimization
                if (is.function(logger)) {
                    G <- G.tau
                    logger(environment())
                }
                break()
            }
            # Perform the step and remember previous loss.
            V <- V.tau
            loss.last <- loss
            G <- G.tau
            # Log after taking current step.
            if (is.function(logger)) {
                logger(environment())
            }
        }
        # Check if current attempt improved previous ones
        if (loss < loss.best) {
            loss.best <- loss
            V.best <- V
        }
    }
    return(list(
        loss = loss.best,
        V = V.best,
        B = null(V.best),
        h = h
    ))
 }
--- a/CVE_R/R/cve_momentum.R
+++ b/CVE_R/R/cve_momentum.R
@ -1,139 +0,0 @@
 #' Implementation of the CVE method as a Riemann Conjugated Gradient method.
 #'
 #' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector
 #' Transport for Optimization on the Stiefel Manifold
 #' @keywords internal
 #' @export
 cve_momentum <- function(X, Y, k,
                         nObs = sqrt(nrow(X)),
                         h = NULL,
                         tau = 1.0,
                         tol = 1e-4,
                         rho = 0.1, # Momentum update.
                         slack = 0,
                         epochs = 50L,
                         attempts = 10L,
                         logger = NULL
 ) {
    # Set `grad` functions environment to enable if to find this environments
    # local variabels, needed to enable the manipulation of this local variables
    # from within `grad`.
    environment(grad) <- environment()
    # Get dimensions.
    n <- nrow(X) # Number of samples.
    p <- ncol(X) # Data dimensions
    q <- p - k   # Complement dimension of the SDR space.
    # Save initial learning rate `tau`.
    tau.init <- tau
    # Addapt tolearance for break condition.
    tol <- sqrt(2 * q) * tol
    # Estaimate bandwidth if not given.
    if (missing(h) || !is.numeric(h)) {
        h <- estimate.bandwidth(X, k, nObs)
    }
    # Compute persistent data.
    # Compute lookup indexes for symmetrie, lower/upper
    # triangular parts and vectorization.
    pair.index <- elem.pairs(seq(n))
    i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
    j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
    # Index of vectorized matrix, for lower and upper triangular part.
    lower <- ((i - 1) * n) + j
    upper <- ((j - 1) * n) + i
    # Create all pairewise differences of rows of `X`.
    X_diff <- X[i, , drop = F] - X[j, , drop = F]
    # Identity matrix.
    I_p <- diag(1, p)
    # Init tracking of current best (according multiple attempts).
    V.best <- NULL
    loss.best <- Inf
    # Start loop for multiple attempts.
    for (attempt in 1:attempts) {
        # Reset learning rate `tau`.
        tau <- tau.init
        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
        # optimization start value.
        V <- rStiefl(p, q)
        # Initial loss and gradient.
        loss <- Inf
        G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
        # Set last loss (aka, loss after applying the step).
        loss.last <- loss
        # Call logger with initial values before starting optimization.
        if (is.function(logger)) {
            epoch <- 0 # Set epoch count to 0 (only relevant for logging).
            error <- NA
            logger(environment())
        }
        M <- matrix(0, p, q)
        ## Start optimization loop.
        for (epoch in 1:epochs) {
            # Apply learning rate `tau`.
            A <- projTangentStiefl(V, G)
            # Momentum update.
            M <- A + rho * projTangentStiefl(V, M)
            # Parallet transport (on Stiefl manifold) into direction of `G`.
            V.tau <- retractStiefl(V - tau * M)
            # Loss at position after a step.
            loss <- grad(X, Y, V.tau, h, loss.only = TRUE, persistent = TRUE)
            # Check if step is appropriate, iff not reduce learning rate.
            if ((loss - loss.last) > slack * loss.last) {
                tau <- tau / 2
                next() # Keep position and try with smaller `tau`.
            }
            # Compute error.
            error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
            # Check break condition (epoch check to skip ignored gradient calc).
            # Note: the devision by `sqrt(2 * k)` is included in `tol`.
            if (error < tol || epoch >= epochs) {
                # take last step and stop optimization.
                V <- V.tau
                # Call logger last time befor stoping.
                if (is.function(logger)) {
                    logger(environment())
                }
                break()
            }
            # Perform the step and remember previous loss.
            V <- V.tau
            loss.last <- loss
            # Call logger after taking a step.
            if (is.function(logger)) {
                logger(environment())
            }
            # Compute gradient at new position.
            G <- grad(X, Y, V, h, persistent = TRUE)
        }
        # Check if current attempt improved previous ones
        if (loss < loss.best) {
            loss.best <- loss
            V.best <- V
        }
    }
    return(list(
        loss = loss.best,
        V = V.best,
        B = null(V.best),
        h = h
    ))
 }
--- a/CVE_R/R/cve_rcg.R
+++ b/CVE_R/R/cve_rcg.R
@ -1,179 +0,0 @@
 #' Implementation of the CVE method as a Riemann Conjugated Gradient method.
 #'
 #' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector
 #' Transport for Optimization on the Stiefel Manifold
 #' @keywords internal
 #' @export
 cve_rcg <- function(X, Y, k,
                         nObs = sqrt(nrow(X)),
                         h = NULL,
                         tau = 1.0,
                         tol = 1e-4,
                         rho = 1e-4, # For Armijo condition.
                         slack = 0,
                         epochs = 50L,
                         attempts = 10L,
                         max.linesearch.iter = 20L,
                         logger = NULL
 ) {
    # Set `grad` functions environment to enable if to find this environments
    # local variabels, needed to enable the manipulation of this local variables
    # from within `grad`.
    environment(grad) <- environment()
    # Get dimensions.
    n <- nrow(X) # Number of samples.
    p <- ncol(X) # Data dimensions
    q <- p - k   # Complement dimension of the SDR space.
    # Save initial learning rate `tau`.
    tau.init <- tau
    # Addapt tolearance for break condition.
    tol <- sqrt(2 * q) * tol
    # Estaimate bandwidth if not given.
    if (missing(h) || !is.numeric(h)) {
        h <- estimate.bandwidth(X, k, nObs)
    }
    # Compute persistent data.
    # Compute lookup indexes for symmetrie, lower/upper
    # triangular parts and vectorization.
    pair.index <- elem.pairs(seq(n))
    i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
    j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
    # Index of vectorized matrix, for lower and upper triangular part.
    lower <- ((i - 1) * n) + j
    upper <- ((j - 1) * n) + i
    # Create all pairewise differences of rows of `X`.
    X_diff <- X[i, , drop = F] - X[j, , drop = F]
    # Identity matrix.
    I_p <- diag(1, p)
    # Init tracking of current best (according multiple attempts).
    V.best <- NULL
    loss.best <- Inf
    # Start loop for multiple attempts.
    for (attempt in 1:attempts) {
        # Reset learning rate `tau`.
        tau <- tau.init
        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
        # optimization start value.
        V <- rStiefl(p, q)
        # Initial loss and gradient.
        loss <- Inf
        G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
        # Set last loss (aka, loss after applying the step).
        loss.last <- loss
        # Cayley transform matrix `A`
        A <- (G %*% t(V)) - (V %*% t(G))
        A.last <- A
        W <- -A
        Z <- W %*% V
        # Compute directional derivative.
        loss.prime <- sum(G * Z) # Tr(G^T Z)
        # Call logger with initial values before starting optimization.
        if (is.function(logger)) {
            epoch <- 0 # Set epoch count to 0 (only relevant for logging).
            error <- NA
            logger(environment())
        }
        ## Start optimization loop.
        for (epoch in 1:epochs) {
            # New directional derivative.
            loss.prime <- sum(G * Z)
            # Reset `tau` for step-size selection.
            tau <- tau.init
            for (iter in 1:max.linesearch.iter) {
                V.tau <- retractStiefl(V + tau * Z)
                # Loss at position after a step.
                loss <- grad(X, Y, V.tau, h,
                             loss.only = TRUE, persistent = TRUE)
                # Check Armijo condition.
                if (loss <= loss.last + (rho * tau * loss.prime)) {
                    break() # Iff fulfilled stop linesearch.
                }
                # Reduce step-size and continue linesearch.
                tau <- tau / 2
            }
            # Compute error.
            error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
            # Perform step with found step-size
            V <- V.tau
            loss.last <- loss
            # Call logger.
            if (is.function(logger)) {
                logger(environment())
            }
            # Check break condition.
            # Note: the devision by `sqrt(2 * k)` is included in `tol`.
            if (error < tol) {
                break()
            }
            # Compute Gradient at new position.
            G <- grad(X, Y, V, h, persistent = TRUE)
            # Store last `A` for `beta` computation.
            A.last <- A
            # Cayley transform matrix `A`
            A <- (G %*% t(V)) - (V %*% t(G))
            # Check 2. break condition.
            if (norm(A, type = 'F') < tol) {
                break()
            }
            # New directional derivative.
            loss.prime <- sum(G * Z)
            # Reset beta if needed.
            if (loss.prime < 0) {
                # Compute `beta` as described in paper.
                beta.FR <- (norm(A, type = 'F') / norm(A.last, type = 'F'))^2
                beta.PR <- sum(A * (A - A.last)) / norm(A.last, type = 'F')^2
                if (beta.PR < -beta.FR) {
                    beta <- -beta.FR
                } else if (abs(beta.PR) < beta.FR) {
                    beta <- beta.PR
                } else if (beta.PR > beta.FR) {
                    beta <- beta.FR
                } else {
                    beta <- 0
                }
            } else {
                beta <- 0
            }
            # Update direction.
            W <- -A + beta * W
            Z <- W %*% V
        }
        # Check if current attempt improved previous ones
        if (loss < loss.best) {
            loss.best <- loss
            V.best <- V
        }
    }
    return(list(
        loss = loss.best,
        V = V.best,
        B = null(V.best),
        h = h
    ))
 }
--- a/CVE_R/R/cve_rmsprob.R
+++ b/CVE_R/R/cve_rmsprob.R
@ -1,121 +0,0 @@
 #' Implementation of the CVE method as a Riemann Conjugated Gradient method.
 #'
 #' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector
 #' Transport for Optimization on the Stiefel Manifold
 #' @keywords internal
 #' @export
 cve_rmsprob <- function(X, Y, k,
                        nObs = sqrt(nrow(X)),
                        h = NULL,
                        tau = 0.1,
                        tol = 1e-4,
                        rho = 0.1, # Momentum update.
                        slack = 0,
                        epochs = 50L,
                        attempts = 10L,
                        epsilon = 1e-7,
                        max.linesearch.iter = 20L,
                        logger = NULL
 ) {
    # Set `grad` functions environment to enable if to find this environments
    # local variabels, needed to enable the manipulation of this local variables
    # from within `grad`.
    environment(grad) <- environment()
    # Get dimensions.
    n <- nrow(X) # Number of samples.
    p <- ncol(X) # Data dimensions
    q <- p - k   # Complement dimension of the SDR space.
    # Save initial learning rate `tau`.
    tau.init <- tau
    # Addapt tolearance for break condition.
    tol <- sqrt(2 * q) * tol
    # Estaimate bandwidth if not given.
    if (missing(h) || !is.numeric(h)) {
        h <- estimate.bandwidth(X, k, nObs)
    }
    # Compute persistent data.
    # Compute lookup indexes for symmetrie, lower/upper
    # triangular parts and vectorization.
    pair.index <- elem.pairs(seq(n))
    i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
    j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
    # Index of vectorized matrix, for lower and upper triangular part.
    lower <- ((i - 1) * n) + j
    upper <- ((j - 1) * n) + i
    # Create all pairewise differences of rows of `X`.
    X_diff <- X[i, , drop = F] - X[j, , drop = F]
    # Identity matrix.
    I_p <- diag(1, p)
    # Init tracking of current best (according multiple attempts).
    V.best <- NULL
    loss.best <- Inf
    # Start loop for multiple attempts.
    for (attempt in 1:attempts) {
        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
        # optimization start value.
        V <- rStiefl(p, q)
        # Call logger with initial values before starting optimization.
        if (is.function(logger)) {
            loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
            epoch <- 0 # Set epoch count to 0 (only relevant for logging).
            error <- NA
            logger(environment())
        }
        M <- matrix(0, p, q)
        ## Start optimization loop.
        for (epoch in 1:epochs) {
            # Compute gradient and loss at current position.
            loss <- Inf
            G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
            # Projectd Gradient.
            A <- projTangentStiefl(V, G)
            # Projected element squared gradient.
            Asq <- projTangentStiefl(V, G * G)
            # Momentum update.
            M <- (1 - rho) * Asq + rho * projTangentStiefl(V, M)
            # Parallet transport (on Stiefl manifold) into direction of `G`.
            V.tau <- retractStiefl(V - tau.init * A / (sqrt(abs(M)) + epsilon))
            # Compute error.
            error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
            # Perform step.
            V <- V.tau
            # Call logger after taking a step.
            if (is.function(logger)) {
                # Set tau to an step size estimate (only for logging)
                tau <- tau.init / mean(sqrt(abs(M)) + epsilon)
                logger(environment())
            }
            # Check break condition.
            # Note: the devision by `sqrt(2 * k)` is included in `tol`.
            if (error < tol) {
                break()
            }
        }
        # Check if current attempt improved previous ones
        if (loss < loss.best) {
            loss.best <- loss
            V.best <- V
        }
    }
    return(list(
        loss = loss.best,
        V = V.best,
        B = null(V.best),
        h = h
    ))
 }
--- a/CVE_R/R/cve_sgd.R
+++ b/CVE_R/R/cve_sgd.R
@ -1,129 +0,0 @@
 #' Simple implementation of the CVE method. 'Simple' means that this method is
 #' a classic GD method unsing no further tricks.
 #'
 #' @keywords internal
 #' @export
 cve_sgd <- function(X, Y, k,
                    nObs = sqrt(nrow(X)),
                    h = NULL,
                    tau = 0.01,
                    tol = 1e-3,
                    epochs = 50L,
                    batch.size = 16L,
                    attempts = 10L,
                    logger = NULL
 ) {
    # Set `grad` functions environment to enable if to find this environments
    # local variabels, needed to enable the manipulation of this local variables
    # from within `grad`.
    environment(grad) <- environment()
    # Get dimensions.
    n <- nrow(X) # Number of samples.
    p <- ncol(X) # Data dimensions
    q <- p - k   # Complement dimension of the SDR space.
    # Save initial learning rate `tau`.
    tau.init <- tau
    # Addapt tolearance for break condition.
    tol <- sqrt(2 * q) * tol
    # Estaimate bandwidth if not given.
    if (missing(h) || !is.numeric(h)) {
        h <- estimate.bandwidth(X, k, nObs)
    }
    # Compute persistent data.
    # Compute lookup indexes for symmetrie, lower/upper
    # triangular parts and vectorization.
    pair.index <- elem.pairs(seq(n))
    i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
    j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
    # Index of vectorized matrix, for lower and upper triangular part.
    lower <- ((i - 1) * n) + j
    upper <- ((j - 1) * n) + i
    # Create all pairewise differences of rows of `X`.
    X_diff <- X[i, , drop = F] - X[j, , drop = F]
    # Identity matrix.
    I_p <- diag(1, p)
    # Init a list of data indices (shuffled for batching).
    indices <- seq(n)
    # Init tracking of current best (according multiple attempts).
    V.best <- NULL
    loss.best <- Inf
    # Start loop for multiple attempts.
    for (attempt in 1:attempts) {
        # Reset learning rate `tau`.
        tau <- tau.init
        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
        # optimization start value.
        V <- rStiefl(p, q)
        # Keep track of last `V` for computing error after an epoch.
        V.last <- V
        if (is.function(logger)) {
            loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
            error <- NA
            epoch <- 0
            logger(environment())
        }
        # Repeat `epochs` times
        for (epoch in 1:epochs) {
            # Shuffle batches
            batch.shuffle <- sample(indices)
            # Make a step for each batch.
            for (batch.start in seq(1, n, batch.size)) {
                # Select batch data indices.
                batch.end <- min(batch.start + batch.size - 1, length(batch.shuffle))
                batch <- batch.shuffle[batch.start:batch.end]
                # Compute batch gradient.
                loss <- NULL
                G <- grad(X[batch, ], Y[batch], V, h, loss.out = TRUE)
                # Cayley transform matrix.
                A <- (G %*% t(V)) - (V %*% t(G))
                # Apply learning rate `tau`.
                A.tau <- tau * A
                # Parallet transport (on Stiefl manifold) into direction of `G`.
                V <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
            }
            # And the error for the history.
            error <- norm(V.last %*% t(V.last) - V %*% t(V), type = "F")
            V.last <- V
            if (is.function(logger)) {
                # Compute loss at end of epoch for logging.
                loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
                logger(environment())
            }
            # Check break condition.
            if (error < tol) {
                break()
            }
        }
        # Compute actual loss after finishing for comparing multiple attempts.
        loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
        # After each attempt, check if last attempt reached a better result.
        if (loss < loss.best) {
            loss.best <- loss
            V.best <- V
        }
    }
    return(list(
        loss = loss.best,
        V = V.best,
        B = null(V.best),
        h = h
    ))
 }
--- a/CVE_R/R/cve_sgdrmsprob.R
+++ b/CVE_R/R/cve_sgdrmsprob.R
@ -1,133 +0,0 @@
 #' Simple implementation of the CVE method. 'Simple' means that this method is
 #' a classic GD method unsing no further tricks.
 #'
 #' @keywords internal
 #' @export
 cve_sgdrmsprob <- function(X, Y, k,
                    nObs = sqrt(nrow(X)),
                    h = NULL,
                    tau = 0.1,
                    tol = 1e-4,
                    rho = 0.1,
                    epochs = 50L,
                    batch.size = 16L,
                    attempts = 10L,
                    epsilon = 1e-7,
                    logger = NULL
 ) {
    # Set `grad` functions environment to enable if to find this environments
    # local variabels, needed to enable the manipulation of this local variables
    # from within `grad`.
    environment(grad) <- environment()
    # Get dimensions.
    n <- nrow(X) # Number of samples.
    p <- ncol(X) # Data dimensions
    q <- p - k   # Complement dimension of the SDR space.
    # Save initial learning rate `tau`.
    tau.init <- tau
    # Addapt tolearance for break condition.
    tol <- sqrt(2 * q) * tol
    # Estaimate bandwidth if not given.
    if (missing(h) || !is.numeric(h)) {
        h <- estimate.bandwidth(X, k, nObs)
    }
    # Compute persistent data.
    # Compute lookup indexes for symmetrie, lower/upper
    # triangular parts and vectorization.
    pair.index <- elem.pairs(seq(n))
    i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
    j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
    # Index of vectorized matrix, for lower and upper triangular part.
    lower <- ((i - 1) * n) + j
    upper <- ((j - 1) * n) + i
    # Create all pairewise differences of rows of `X`.
    X_diff <- X[i, , drop = F] - X[j, , drop = F]
    # Identity matrix.
    I_p <- diag(1, p)
    # Init a list of data indices (shuffled for batching).
    indices <- seq(n)
    # Init tracking of current best (according multiple attempts).
    V.best <- NULL
    loss.best <- Inf
    # Start loop for multiple attempts.
    for (attempt in 1:attempts) {
        # Reset learning rate `tau`.
        tau <- tau.init
        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
        # optimization start value.
        V <- rStiefl(p, q)
        # Keep track of last `V` for computing error after an epoch.
        V.last <- V
        if (is.function(logger)) {
            loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
            error <- NA
            epoch <- 0
            logger(environment())
        }
        M <- matrix(0, p, q)
        # Repeat `epochs` times
        for (epoch in 1:epochs) {
            # Shuffle batches
            batch.shuffle <- sample(indices)
            # Make a step for each batch.
            for (batch.start in seq(1, n, batch.size)) {
                # Select batch data indices.
                batch.end <- min(batch.start + batch.size - 1, length(batch.shuffle))
                batch <- batch.shuffle[batch.start:batch.end]
                # Compute batch gradient.
                loss <- NULL
                G <- grad(X[batch, ], Y[batch], V, h, loss.out = TRUE)
                # Projectd Gradient.
                A <- projTangentStiefl(V, G)
                # Projected element squared gradient.
                Asq <- projTangentStiefl(V, G * G)
                # Momentum update.
                M <- (1 - rho) * Asq + rho * projTangentStiefl(V, M)
                # Parallet transport (on Stiefl manifold) into direction of `G`.
                V <- retractStiefl(V - tau.init * A / (sqrt(abs(M)) + epsilon))
            }
            # And the error for the history.
            error <- norm(V.last %*% t(V.last) - V %*% t(V), type = "F")
            V.last <- V
            if (is.function(logger)) {
                # Compute loss at end of epoch for logging.
                loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
                logger(environment())
            }
            # Check break condition.
            if (error < tol) {
                break()
            }
        }
        # Compute actual loss after finishing for comparing multiple attempts.
        loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
        # After each attempt, check if last attempt reached a better result.
        if (loss < loss.best) {
            loss.best <- loss
            V.best <- V
        }
    }
    return(list(
        loss = loss.best,
        V = V.best,
        B = null(V.best),
        h = h
    ))
 }
--- a/CVE_R/R/cve_simple.R
+++ b/CVE_R/R/cve_simple.R
@ -1,139 +0,0 @@
 #' Simple implementation of the CVE method. 'Simple' means that this method is
 #' a classic GD method unsing no further tricks.
 #'
 #' @keywords internal
 #' @export
 cve_simple <- function(X, Y, k,
                       nObs = sqrt(nrow(X)),
                       h = NULL,
                       tau = 1.0,
                       tol = 1e-3,
                       slack = 0,
                       epochs = 50L,
                       attempts = 10L,
                       logger = NULL
 ) {
    # Set `grad` functions environment to enable if to find this environments
    # local variabels, needed to enable the manipulation of this local variables
    # from within `grad`.
    environment(grad) <- environment()
    # Get dimensions.
    n <- nrow(X) # Number of samples.
    p <- ncol(X) # Data dimensions
    q <- p - k   # Complement dimension of the SDR space.
    # Save initial learning rate `tau`.
    tau.init <- tau
    # Addapt tolearance for break condition.
    tol <- sqrt(2 * q) * tol
    # Estaimate bandwidth if not given.
    if (missing(h) || !is.numeric(h)) {
        h <- estimate.bandwidth(X, k, nObs)
    }
    # Compute persistent data.
    # Compute lookup indexes for symmetrie, lower/upper
    # triangular parts and vectorization.
    pair.index <- elem.pairs(seq(n))
    i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
    j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
    # Index of vectorized matrix, for lower and upper triangular part.
    lower <- ((i - 1) * n) + j
    upper <- ((j - 1) * n) + i
    # Create all pairewise differences of rows of `X`.
    X_diff <- X[i, , drop = F] - X[j, , drop = F]
    # Identity matrix.
    I_p <- diag(1, p)
    # Init tracking of current best (according multiple attempts).
    V.best <- NULL
    loss.best <- Inf
    # Start loop for multiple attempts.
    for (attempt in 1:attempts) {
        # Reset learning rate `tau`.
        tau <- tau.init
        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
        # optimization start value.
        V <- rStiefl(p, q)
        # Initial loss and gradient.
        loss <- Inf
        G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
        # Set last loss (aka, loss after applying the step).
        loss.last <- loss
        # Cayley transform matrix `A`
        A <- (G %*% t(V)) - (V %*% t(G))
        # Call logger with initial values before starting optimization.
        if (is.function(logger)) {
            logger(0L, attempt, loss, V, tau)
        }
        ## Start optimization loop.
        for (epoch in 1:epochs) {
            # Apply learning rate `tau`.
            A.tau <- tau * A
            # Parallet transport (on Stiefl manifold) into direction of `G`.
            V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
            # Loss at position after a step.
            loss <- grad(X, Y, V.tau, h, loss.only = TRUE, persistent = TRUE)
            # Check if step is appropriate, iff not reduce learning rate.
            if ((loss - loss.last) > slack * loss.last) {
                tau <- tau / 2
                next() # Keep position and try with smaller `tau`.
            }
            # Compute error.
            error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
            # Check break condition (epoch check to skip ignored gradient calc).
            # Note: the devision by `sqrt(2 * k)` is included in `tol`.
            if (error < tol || epoch >= epochs) {
                # take last step and stop optimization.
                V <- V.tau
                # Call logger last time befor stoping.
                if (is.function(logger)) {
                    logger(epoch, attempt, loss, V, tau)
                }
                break()
            }
            # Perform the step and remember previous loss.
            V <- V.tau
            loss.last <- loss
            # Call logger after taking a step.
            if (is.function(logger)) {
                logger(epoch, attempt, loss, V, tau)
            }
            # Compute gradient at new position.
            G <- grad(X, Y, V, h, persistent = TRUE)
            # Cayley transform matrix `A`
            A <- (G %*% t(V)) - (V %*% t(G))
        }
        # Check if current attempt improved previous ones
        if (loss < loss.best) {
            loss.best <- loss
            V.best <- V
        }
    }
    return(list(
        loss = loss.best,
        V = V.best,
        B = null(V.best),
        h = h
    ))
 }
--- a/CVE_R/R/datasets.R
+++ b/CVE_R/R/datasets.R
@ -1,109 +0,0 @@
 #' Generates test datasets.
 #'
 #' Provides sample datasets. There are 5 different datasets named
 #' M1, M2, M3, M4 and M5 described in the paper references below.
 #' The general model is given by:
 #' \deqn{Y ~ g(B'X) + \epsilon}
 #'
 #' @param name One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}
 #' @param n nr samples
 #' @param p Dim. of random variable \code{X}.
 #' @param p.mix Only for \code{"M4"}, see: below.
 #' @param lambda Only for \code{"M4"}, see: below.
 #'
 #' @return List with elements
 #' \itemize{
 #'      \item{X}{data}
 #'      \item{Y}{response}
 #'      \item{B}{Used dim-reduction matrix}
 #'      \item{name}{Name of the dataset (name parameter)}
 #' }
 #'
 #' @section M1:
 #' The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
 #' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
 #' The link function \eqn{g} is given as
 #' \deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + 0.5\epsilon}{g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + 0.5 epsilon}
 #' @section M2:
 #' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a default of \eqn{n = 200} data points.
 #' The link function \eqn{g} is given as
 #' \deqn{g(x) = x_1 x_2^2 + 0.5\epsilon}{g(x) = x_1 x_2^2 + 0.5 epsilon}
 #' @section M3:
 #' TODO:
 #' @section M4:
 #' TODO:
 #' @section M5:
 #' TODO:
 #'
 #' @import stats
 #' @importFrom stats rnorm rbinom
 #' @export
 dataset <- function(name = "M1", n, B, p.mix = 0.3, lambda = 1.0) {
    # validate parameters
    stopifnot(name %in% c("M1", "M2", "M3", "M4", "M5"))
    # set default values if not supplied
    if (missing(n)) {
        n <- if (name %in% c("M1", "M2")) 200 else if (name != "M5") 100 else 42
    }
    if (missing(B)) {
        p <- 12
        if (name == "M1") {
            B <- cbind(
                c( 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0),
                c( 1,-1, 1,-1, 1,-1, 0, 0, 0, 0, 0, 0)
            ) / sqrt(6)
        } else if (name == "M2") {
            B <- cbind(
                c(c(1, 0), rep(0, 10)),
                c(c(0, 1), rep(0, 10))
            )
        } else {
            B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, 6)), 12, 1)
        }
    } else {
        p <- dim(B)[1]
        # validate col. nr to match dataset `k = dim(B)[2]`
        stopifnot(
            name %in% c("M1", "M2") && dim(B)[2] == 2,
            name %in% c("M3", "M4", "M5") && dim(B)[2] == 1
        )
    }
    # set link function `g` for model `Y ~ g(B'X) + epsilon`
    if (name == "M1") {
        g <- function(BX) { BX[1] / (0.5 + (BX[2] + 1.5)^2) }
    } else if (name == "M2") {
        g <- function(BX) { BX[1] * BX[2]^2 }
    } else if (name %in% c("M3", "M4")) {
        g <- function(BX) { cos(BX[1]) }
    } else { # name == "M5"
        g <- function(BX) { 2 * log(abs(BX[1]) + 1) }
    }
    # compute X
    if (name != "M4") {
        # compute root of the covariance matrix according the dataset
        if (name %in% c("M1", "M3")) {
            # Variance-Covariance structure for `X ~ N_p(0, \Sigma)` with
            # `\Sigma_{i, j} = 0.5^{|i - j|}`.
            Sigma <- matrix(0.5^abs(kronecker(1:p, 1:p, '-')), p, p)
            # decompose Sigma to Sigma.root^T Sigma.root = Sigma for usage in creation of `X`
            Sigma.root <- chol(Sigma)
        } else { # name %in% c("M2", "M5")
            Sigma.root <- diag(rep(1, p)) # d-dim identity
        }
        # data `X` as multivariate random normal variable with
        # variance matrix `Sigma`.
        X <- replicate(p, rnorm(n, 0, 1)) %*% Sigma.root
    } else { # name == "M4"
        X <- t(replicate(100, rep((1 - 2 * rbinom(1, 1, p.mix)) * lambda, p) + rnorm(p, 0, 1)))
    }
    # responce `y ~ g(B'X) + epsilon` with `epsilon ~ N(0, 1 / 2)`
    Y <- apply(X, 1, function(X_i) {
        g(t(B) %*% X_i) + rnorm(1, 0, 0.5)
    })
    return(list(X = X, Y = Y, B = B, name = name))
 }
--- a/CVE_R/R/estimateBandwidth.R
+++ b/CVE_R/R/estimateBandwidth.R
@ -1,27 +0,0 @@
 #' Estimated bandwidth for CVE.
 #'
 #' Estimates a propper bandwidth \code{h} according
 #' \deqn{%
 #' h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
 #' h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
 #'
 #' @param X data matrix of dimension (n x p) with n data points X_i of dimension
 #'  q. Therefor each row represents a datapoint of dimension p.
 #' @param k Guess for rank(B).
 #' @param nObs Ether numeric of a function. If specified as numeric value
 #'  its used in the computation of the bandwidth directly. If its a function
 #'  `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not
 #'  supplied at all is to use \code{nObs <- nrow(x)^0.5}.
 #'
 #' @seealso [\code{\link{qchisq}}]
 #' @export
 estimate.bandwidth <- function(X, k, nObs) {
    n <- nrow(X)
    p <- ncol(X)
    X_centered <- scale(X, center=TRUE, scale=FALSE)
    Sigma <- (1 / n) * t(X_centered) %*% X_centered
    quantil <- qchisq((nObs - 1) / (n - 1), k)
    return(2 * quantil * sum(diag(Sigma)) / p)
 }
--- a/CVE_R/R/gradient.R
+++ b/CVE_R/R/gradient.R
@ -1,82 +0,0 @@
 #' Compute get gradient of `L(V)` given a dataset `X`.
 #'
 #' @param X Data matrix.
 #' @param Y Responce.
 #' @param V Position to compute the gradient at, aka point on Stiefl manifold.
 #' @param h Bandwidth
 #' @param loss.out Iff \code{TRUE} loss will be written to parent environment.
 #' @param loss.only Boolean to only compute the loss, of \code{TRUE} a single
 #'  value loss is returned and \code{envir} is ignored.
 #' @param persistent Determines if data indices and dependent calculations shall
 #'  be reused from the parent environment. ATTENTION: Do NOT set this flag, only
 #'  intended for internal usage by carefully aligned functions!
 #' @keywords internal
 #' @export
 grad <- function(X, Y, V, h,
                 loss.out = FALSE,
                 loss.only = FALSE,
                 persistent = FALSE) {
    # Get number of samples and dimension.
    n <- nrow(X)
    p <- ncol(X)
    if (!persistent) {
        # Compute lookup indexes for symmetrie, lower/upper
        # triangular parts and vectorization.
        pair.index <- elem.pairs(seq(n))
        i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
        j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
        # Index of vectorized matrix, for lower and upper triangular part.
        lower <- ((i - 1) * n) + j
        upper <- ((j - 1) * n) + i
        # Create all pairewise differences of rows of `X`.
        X_diff <- X[i, , drop = F] - X[j, , drop = F]
    }
    # Projection matrix onto `span(V)`
    Q <- diag(1, p) - tcrossprod(V, V)
    # Vectorized distance matrix `D`.
    vecD <- colSums(tcrossprod(Q, X_diff)^2)
    # Create Kernel matrix (aka. apply kernel to distances)
    K <- matrix(1, n, n) # `exp(0) == 1`
    K[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part
    K[upper] <- t(K)[upper] # Mirror lower tri. to upper
    # Weighted `Y` momentums
    colSumsK <- colSums(K)
    y1 <- (K %*% Y) / colSumsK
    y2 <- (K %*% Y^2) / colSumsK
    # Per example loss `L(V, X_i)`
    L <- y2 - y1^2
    if (loss.only) {
        return(mean(L))
    }
    if (loss.out) {
        loss <<- mean(L)
    }
    # Compute scaling vector `vecS` for `X_diff`.
    tmp <- kronecker(matrix(y1, n, 1), matrix(Y, 1, n), `-`)^2
    tmp <- as.vector(L) - tmp
    tmp <- tmp * K / colSumsK
    vecS <- (tmp + t(tmp))[lower] * vecD
    # The gradient.
    # 1. The `crossprod(A, B)` is equivalent to `t(A) %*% B`,
    # 2. `(X_diff %*% V) * vecS` is first a marix matrix mult. and then using
    #    recycling to scale each row with the values of `vecS`.
    #    Note that `vecS` is a vector and that `R` uses column-major ordering
    #    of matrices.
    # (See: notes for more details)
    # TODO: Depending on n, p, q decide which version to take (for current
    #    datasets "inner" is faster, see: notes).
    #    inner = crossprod(X_diff, X_diff * vecS) %*% V,
    #    outer = crossprod(X_diff, (X_diff %*% V) * vecS)
    G <- crossprod(X_diff, X_diff * vecS) %*% V
    G <- (-2 / (n * h^2)) * G
    return(G)
 }
--- a/CVE_R/R/gridSearch.R
+++ b/CVE_R/R/gridSearch.R
@ -1,43 +0,0 @@
 #' Performs a grid search for parameters over a parameter grid.
 #' @examples
 #' args <- list(
 #'     h = c(0.05, 0.1, 0.2),
 #'     method = c("simple", "sgd"),
 #'     tau = c(0.5, 0.1, 0.01)
 #' )
 #' cve.grid.search(args)
 #' @export
 cve.grid.search <- function(X, Y, k, args) {
    args$stringsAsFactors = FALSE
    args$KEEP.OUT.ATTRS = FALSE
    grid <- do.call(expand.grid, args)
    grid.length <- length(grid[[1]])
    print(grid)
    for (i in 1:grid.length) {
        arguments <- as.list(grid[i, ])
        # Set required arguments
        arguments$X <- X
        arguments$Y <- Y
        arguments$k <- k
        # print(arguments)
        dr <- do.call(cve.call, arguments)
        print(dr$loss)
    }
 }
 # ds <- dataset()
 # X <- ds$X
 # Y <- ds$Y
 # (k <- ncol(ds$B))
 # args <- list(
 #     h = c(0.05, 0.1, 0.2),
 #     method = c("simple", "sgd"),
 #     tau = c(0.5, 0.1, 0.01),
 #     attempts = c(1L)
 # )
 # cve.grid.search(X, Y, k, args)
--- a/CVE_R/R/util.R
+++ b/CVE_R/R/util.R
@ -1,82 +0,0 @@
 #' Samples uniform from the Stiefl Manifold.
 #'
 #' @param p row dim.
 #' @param q col dim.
 #' @return `(p, q)` semi-orthogonal matrix
 #' @examples
 #'  V <- rStiefel(6, 4)
 #' @export
 rStiefl <- function(p, q) {
    return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
 }
 #' Retraction to the manifold.
 #'
 #' @param A matrix.
 #' @return `(p, q)` semi-orthogonal matrix, aka element of the Stiefl manifold.
 #' @keywords internal
 #' @export
 retractStiefl <- function(A) {
    return(qr.Q(qr(A)))
 }
 #' Skew-Symmetric matrix computed from `A` as
 #' \eqn{1/2 (A - A^T)}.
 #' @param A Matrix of dim `(p, q)`
 #' @return Skew-Symmetric matrix of dim `(p, p)`.
 #' @keywords internal
 #' @export
 skew <- function(A) {
    0.5 * (A - t(A))
 }
 #' Symmetric matrix computed from `A` as
 #' \eqn{1/2 (A + A^T)}.
 #' @param A Matrix of dim `(p, q)`
 #' @return Symmetric matrix of dim `(p, p)`.
 #' @keywords internal
 #' @export
 sym <- function(A) {
    0.5 * (A + t(A))
 }
 #' Orthogonal Projection onto the tangent space of the stiefl manifold.
 #'
 #' @param V Point on the stiefl manifold.
 #' @param G matrix to be projected onto the tangent space at `V`.
 #' @return `(p, q)` matrix as element of the tangent space at `V`.
 #' @keywords internal
 #' @export
 projTangentStiefl <- function(V, G) {
    Q <- diag(1, nrow(V)) - V %*% t(V)
    return(Q %*% G + V %*% skew(t(V) %*% G))
 }
 #' Null space basis of given matrix `V`
 #'
 #' @param V `(p, q)` matrix
 #' @return Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`.
 #' @keywords internal
 #' @export
 null <- function(V) {
    tmp <- qr(V)
    set <- if(tmp$rank == 0L) seq_len(ncol(V)) else -seq_len(tmp$rank)
    return(qr.Q(tmp, complete=TRUE)[, set, drop=FALSE])
 }
 #' Creates a (numeric) matrix where each column contains
 #' an element to element matching.
 #' @param elements numeric vector of elements to match
 #' @return matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`.
 #' @keywords internal
 #' @examples
 #'  elem.pairs(seq.int(2, 5))
 #' @export
 elem.pairs <- function(elements) {
    # Number of elements to match.
    n <- length(elements)
    # Create all combinations.
    pairs <- rbind(rep(elements, each=n), rep(elements, n))
    # Select unique combinations without self interaction.
    return(pairs[, pairs[1, ] < pairs[2, ]])
 }
--- a/CVE_R/demo/00Index
+++ b/CVE_R/demo/00Index
@ -1,2 +0,0 @@
 runtime_test    Runtime comparison of CVE against MAVE for M1 - M5 datasets.
 logging         Example of a logger function for cve algorithm analysis.
--- a/CVE_R/demo/logging.R
+++ b/CVE_R/demo/logging.R
@ -1,43 +0,0 @@
 library(CVEpureR)
 # Setup histories.
 (epochs <- 50)
 (attempts <- 10)
 loss.history       <- matrix(NA, epochs + 1, attempts)
 error.history      <- matrix(NA, epochs + 1, attempts)
 tau.history        <- matrix(NA, epochs + 1, attempts)
 true.error.history <- matrix(NA, epochs + 1, attempts)
 # Create a dataset
 ds <- dataset("M1")
 X <- ds$X
 Y <- ds$Y
 B <- ds$B # the true `B`
 (k <- ncol(ds$B))
 # True projection matrix.
 P <- B %*% solve(t(B) %*% B) %*% t(B)
 # Define the logger for the `cve()` method.
 logger <- function(env) {
    # Note the `<<-` assignement!
    loss.history[env$epoch + 1, env$attempt] <<- env$loss
    error.history[env$epoch + 1, env$attempt] <<- env$error
    tau.history[env$epoch + 1, env$attempt] <<- env$tau
    # Compute true error by comparing to the true `B`
    B.est <- null(env$V) # Function provided by CVE
    P.est <- B.est %*% solve(t(B.est) %*% B.est) %*% t(B.est)
    true.error <- norm(P - P.est, 'F') / sqrt(2 * k)
    true.error.history[env$epoch + 1, env$attempt] <<- true.error
 }
 # Performe SDR for ONE `k`.
 dr <- cve(Y ~ X, k = k, logger = logger, epochs = epochs, attempts = attempts)
 # Plot history's
 par(mfrow = c(2, 2))
 matplot(loss.history,       type = 'l', log = 'y', xlab = 'iter',
        main = 'loss', ylab = expression(L(V[iter])))
 matplot(error.history,      type = 'l', log = 'y', xlab = 'iter',
        main = 'error', ylab = 'error')
 matplot(tau.history,        type = 'l', log = 'y', xlab = 'iter',
        main = 'tau', ylab = 'tau')
 matplot(true.error.history, type = 'l', log = 'y', xlab = 'iter',
        main = 'true error', ylab = 'true error')
--- a/CVE_R/demo/runtime_test.R
+++ b/CVE_R/demo/runtime_test.R
@ -1,89 +0,0 @@
 # Usage:
 # ~$ Rscript runtime_test.R
 library(CVEpureR) # load CVE
 #' Writes progress to console.
 tell.user <- function(name, start.time, i, length) {
    cat("\rRunning Test (", name, "):",
        i, "/", length,
        " - elapsed:", format(Sys.time() - start.time), "\033[K")
 }
 subspace.dist <- function(B1, B2){
    P1 <- B1 %*% solve(t(B1) %*% B1) %*% t(B1)
    P2 <- B2 %*% solve(t(B2) %*% B2) %*% t(B2)
    return(norm(P1 - P2, type = 'F'))
 }
 # Number of simulations
 SIM.NR <- 50
 # maximal number of iterations in curvilinear search algorithm
 MAXIT <- 50
 # number of arbitrary starting values for curvilinear optimization
 ATTEMPTS <- 10
 # set names of datasets
 dataset.names <- c("M1", "M2", "M3", "M4", "M5")
 # Set used CVE method
 methods <- c("simple") # c("legacy", "simple", "sgd", "linesearch")
 # Setup error and time tracking variables
 error <- matrix(NA, SIM.NR, length(methods) * length(dataset.names))
 time <- matrix(NA, SIM.NR, ncol(error))
 colnames(error) <- kronecker(paste0(dataset.names, '-'), methods, paste0)
 colnames(time) <- colnames(error)
 # only for telling user (to stdout)
 count <- 0
 start.time <- Sys.time()
 # Start simulation loop.
 for (sim in 1:SIM.NR) {
    # Repeat for each dataset.
    for (name in dataset.names) {
        count <- count + 1
        tell.user(name, start.time, count, SIM.NR * length(dataset.names))
        # Create a new dataset
        ds <- dataset(name)
        # Prepare X, Y and combine to data matrix
        Y <- ds$Y
        X <- ds$X
        data <- cbind(Y, X)
        # get dimensions
        dim <- ncol(X)
        truedim <- ncol(ds$B)
        for (method in methods) {
            dr.time <- system.time(
                dr <- cve.call(X, Y,
                    method = method,
                    k = truedim,
                    attempts = ATTEMPTS
                )
            )
            dr <- dr[[truedim]]
            key <- paste0(name, '-', method)
            error[sim, key] <- subspace.dist(dr$B, ds$B) / sqrt(2 * truedim)
            time[sim, key] <- dr.time["elapsed"]
        }
    }
 }
 cat("\n\n## Time [sec] Means:\n")
 print(colMeans(time))
 cat("\n## Error Means:\n")
 print(colMeans(error))
 at <- seq(ncol(error)) + rep(seq(ncol(error) / length(methods)) - 1, each = length(methods))
 boxplot(error,
    main = paste0("Error (Nr of simulations ", SIM.NR, ")"),
    ylab = "Error",
    las = 2,
    at = at
 )
 boxplot(time,
    main = paste0("Time (Nr of simulations ", SIM.NR, ")"),
    ylab = "Time [sec]",
    las = 2,
    at = at
 )
--- a/CVE_R/inst/doc/CVE_paper.pdf
+++ b/CVE_R/inst/doc/CVE_paper.pdf
--- a/CVE_R/man/CVEpureR-package.Rd
+++ b/CVE_R/man/CVEpureR-package.Rd
@ -1,20 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/CVE.R
 \docType{package}
 \name{CVEpureR-package}
 \alias{CVEpureR}
 \alias{CVEpureR-package}
 \title{Conditional Variance Estimator (CVE)}
 \description{
 Conditional Variance Estimator for Sufficient Dimension
 Reduction
 }
 \details{
 TODO: And some details
 }
 \references{
 Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
 }
 \author{
 Loki
 }
--- a/CVE_R/man/cve.Rd
+++ b/CVE_R/man/cve.Rd
@ -1,71 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/CVE.R
 \name{cve}
 \alias{cve}
 \alias{cve.call}
 \title{Implementation of the CVE method.}
 \usage{
 cve(formula, data, method = "simple", max.dim = 10, ...)
 cve.call(X, Y, method = "simple", nObs = nrow(X)^0.5, min.dim = 1,
  max.dim = 10, k, ...)
 }
 \arguments{
 \item{formula}{Formel for the regression model defining `X`, `Y`.
 See: \code{\link{formula}}.}
 \item{data}{data.frame holding data for formula.}
 \item{method}{The different only differe in the used optimization.
 All of them are Gradient based optimization on a Stiefel manifold.
 \itemize{
     \item "simple" Simple reduction of stepsize.
     \item "sgd" stocastic gradient decent.
     \item TODO: further
 }}
 \item{...}{Further parameters depending on the used method.}
 \item{X}{Data}
 \item{Y}{Responces}
 \item{nObs}{as described in the Paper.}
 \item{k}{guess for SDR dimension.}
 \item{nObs}{Like in the paper.}
 \item{...}{Method specific parameters.}
 }
 \description{
 Conditional Variance Estimator (CVE) is a novel sufficient dimension
 reduction (SDR) method assuming a model
 \deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
 where B'X is a lower dimensional projection of the predictors.
 }
 \examples{
 library(CVE)
 # sample dataset
 ds <- dataset("M5")
 # call ´cve´ with default method (aka "simple")
 dr.simple <- cve(ds$Y ~ ds$X, k = ncol(ds$B))
 # plot optimization history (loss via iteration)
 plot(dr.simple, main = "CVE M5 simple")
 # call ´cve´ with method "linesearch" using ´data.frame´ as data.
 data <- data.frame(Y = ds$Y, X = ds$X)
 # Note: ´Y, X´ are NOT defined, they are extracted from ´data´.
 dr.linesearch <- cve(Y ~ ., data, method = "linesearch", k = ncol(ds$B))
 plot(dr.linesearch, main = "CVE M5 linesearch")
 }
 \references{
 Fertl L., Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
 }
 \seealso{
 \code{\link{formula}}. For a complete parameters list (dependent on
 the method) see \code{\link{cve_simple}}, \code{\link{cve_sgd}}
 }
--- a/CVE_R/man/cve.grid.search.Rd
+++ b/CVE_R/man/cve.grid.search.Rd
@ -1,19 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/gridSearch.R
 \name{cve.grid.search}
 \alias{cve.grid.search}
 \title{Performs a grid search for parameters over a parameter grid.}
 \usage{
 cve.grid.search(X, Y, k, args)
 }
 \description{
 Performs a grid search for parameters over a parameter grid.
 }
 \examples{
 args <- list(
    h = c(0.05, 0.1, 0.2),
    method = c("simple", "sgd"),
    tau = c(0.5, 0.1, 0.01)
 )
 cve.grid.search(args)
 }
--- a/CVE_R/man/cve_linesearch.Rd
+++ b/CVE_R/man/cve_linesearch.Rd
@ -1,16 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cve_linesearch.R
 \name{cve_linesearch}
 \alias{cve_linesearch}
 \title{Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
 conditions.}
 \usage{
 cve_linesearch(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
  tol = 0.001, rho1 = 0.1, rho2 = 0.9, slack = 0, epochs = 50L,
  attempts = 10L, max.linesearch.iter = 10L, logger = NULL)
 }
 \description{
 Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
 conditions.
 }
 \keyword{internal}
--- a/CVE_R/man/cve_sgd.Rd
+++ b/CVE_R/man/cve_sgd.Rd
@ -1,16 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cve_sgd.R
 \name{cve_sgd}
 \alias{cve_sgd}
 \title{Simple implementation of the CVE method. 'Simple' means that this method is
 a classic GD method unsing no further tricks.}
 \usage{
 cve_sgd(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 0.01,
  tol = 0.001, epochs = 50L, batch.size = 16L, attempts = 10L,
  logger = NULL)
 }
 \description{
 Simple implementation of the CVE method. 'Simple' means that this method is
 a classic GD method unsing no further tricks.
 }
 \keyword{internal}
--- a/CVE_R/man/cve_simple.Rd
+++ b/CVE_R/man/cve_simple.Rd
@ -1,16 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cve_simple.R
 \name{cve_simple}
 \alias{cve_simple}
 \title{Simple implementation of the CVE method. 'Simple' means that this method is
 a classic GD method unsing no further tricks.}
 \usage{
 cve_simple(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
  tol = 0.001, slack = 0, epochs = 50L, attempts = 10L,
  logger = NULL)
 }
 \description{
 Simple implementation of the CVE method. 'Simple' means that this method is
 a classic GD method unsing no further tricks.
 }
 \keyword{internal}
--- a/CVE_R/man/dataset.Rd
+++ b/CVE_R/man/dataset.Rd
@ -1,64 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/datasets.R
 \name{dataset}
 \alias{dataset}
 \title{Generates test datasets.}
 \usage{
 dataset(name = "M1", n, B, p.mix = 0.3, lambda = 1)
 }
 \arguments{
 \item{name}{One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}}
 \item{n}{nr samples}
 \item{p.mix}{Only for \code{"M4"}, see: below.}
 \item{lambda}{Only for \code{"M4"}, see: below.}
 \item{p}{Dim. of random variable \code{X}.}
 }
 \value{
 List with elements
 \itemize{
     \item{X}{data}
     \item{Y}{response}
     \item{B}{Used dim-reduction matrix}
     \item{name}{Name of the dataset (name parameter)}
 }
 }
 \description{
 Provides sample datasets. There are 5 different datasets named
 M1, M2, M3, M4 and M5 described in the paper references below.
 The general model is given by:
 \deqn{Y ~ g(B'X) + \epsilon}
 }
 \section{M1}{
 The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
 dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
 The link function \eqn{g} is given as
 \deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + 0.5\epsilon}{g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + 0.5 epsilon}
 }
 \section{M2}{
 \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a default of \eqn{n = 200} data points.
 The link function \eqn{g} is given as
 \deqn{g(x) = x_1 x_2^2 + 0.5\epsilon}{g(x) = x_1 x_2^2 + 0.5 epsilon}
 }
 \section{M3}{
 TODO:
 }
 \section{M4}{
 TODO:
 }
 \section{M5}{
 TODO:
 }
--- a/CVE_R/man/elem.pairs.Rd
+++ b/CVE_R/man/elem.pairs.Rd
@ -1,23 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/util.R
 \name{elem.pairs}
 \alias{elem.pairs}
 \title{Creates a (numeric) matrix where each column contains
 an element to element matching.}
 \usage{
 elem.pairs(elements)
 }
 \arguments{
 \item{elements}{numeric vector of elements to match}
 }
 \value{
 matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`.
 }
 \description{
 Creates a (numeric) matrix where each column contains
 an element to element matching.
 }
 \examples{
 elem.pairs(seq.int(2, 5))
 }
 \keyword{internal}
--- a/CVE_R/man/estimate.bandwidth.Rd
+++ b/CVE_R/man/estimate.bandwidth.Rd
@ -1,28 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/estimateBandwidth.R
 \name{estimate.bandwidth}
 \alias{estimate.bandwidth}
 \title{Estimated bandwidth for CVE.}
 \usage{
 estimate.bandwidth(X, k, nObs)
 }
 \arguments{
 \item{X}{data matrix of dimension (n x p) with n data points X_i of dimension
 q. Therefor each row represents a datapoint of dimension p.}
 \item{k}{Guess for rank(B).}
 \item{nObs}{Ether numeric of a function. If specified as numeric value
 its used in the computation of the bandwidth directly. If its a function
 `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not
 supplied at all is to use \code{nObs <- nrow(x)^0.5}.}
 }
 \description{
 Estimates a propper bandwidth \code{h} according
 \deqn{%
 h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
 h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
 }
 \seealso{
 [\code{\link{qchisq}}]
 }
--- a/CVE_R/man/grad.Rd
+++ b/CVE_R/man/grad.Rd
@ -1,31 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/gradient.R
 \name{grad}
 \alias{grad}
 \title{Compute get gradient of `L(V)` given a dataset `X`.}
 \usage{
 grad(X, Y, V, h, loss.out = FALSE, loss.only = FALSE,
  persistent = FALSE)
 }
 \arguments{
 \item{X}{Data matrix.}
 \item{Y}{Responce.}
 \item{V}{Position to compute the gradient at, aka point on Stiefl manifold.}
 \item{h}{Bandwidth}
 \item{loss.out}{Iff \code{TRUE} loss will be written to parent environment.}
 \item{loss.only}{Boolean to only compute the loss, of \code{TRUE} a single
 value loss is returned and \code{envir} is ignored.}
 \item{persistent}{Determines if data indices and dependent calculations shall
 be reused from the parent environment. ATTENTION: Do NOT set this flag, only
 intended for internal usage by carefully aligned functions!}
 }
 \description{
 Compute get gradient of `L(V)` given a dataset `X`.
 }
 \keyword{internal}
--- a/CVE_R/man/null.Rd
+++ b/CVE_R/man/null.Rd
@ -1,18 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/util.R
 \name{null}
 \alias{null}
 \title{Null space basis of given matrix `V`}
 \usage{
 null(V)
 }
 \arguments{
 \item{V}{`(p, q)` matrix}
 }
 \value{
 Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`.
 }
 \description{
 Null space basis of given matrix `V`
 }
 \keyword{internal}
--- a/CVE_R/man/plot.cve.Rd
+++ b/CVE_R/man/plot.cve.Rd
@ -1,28 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/CVE.R
 \name{plot.cve}
 \alias{plot.cve}
 \title{Ploting helper for objects of class \code{cve}.}
 \usage{
 ## S3 method for class 'cve'
 plot(x, content = "history", ...)
 }
 \arguments{
 \item{x}{Object of class \code{cve} (result of [cve()]).}
 \item{...}{Pass through parameters to [plot()] and [lines()]}
 \item{content}{Specifies what to plot:
 \itemize{
     \item "history" Plots the loss history from stiefel optimization
         (default).
     \item ... TODO: add (if there are any)
 }}
 }
 \description{
 Ploting helper for objects of class \code{cve}.
 }
 \seealso{
 see \code{\link{par}} for graphical parameters to pass through
     as well as \code{\link{plot}} for standard plot utility.
 }
--- a/CVE_R/man/rStiefl.Rd
+++ b/CVE_R/man/rStiefl.Rd
@ -1,22 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/util.R
 \name{rStiefl}
 \alias{rStiefl}
 \title{Samples uniform from the Stiefel Manifold}
 \usage{
 rStiefl(p, q)
 }
 \arguments{
 \item{p}{row dim.}
 \item{q}{col dim.}
 }
 \value{
 `(p, q)` semi-orthogonal matrix
 }
 \description{
 Samples uniform from the Stiefel Manifold
 }
 \examples{
 V <- rStiefel(6, 4)
 }
--- a/CVE_R/man/summary.cve.Rd
+++ b/CVE_R/man/summary.cve.Rd
@ -1,14 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/CVE.R
 \name{summary.cve}
 \alias{summary.cve}
 \title{Prints a summary of a \code{cve} result.}
 \usage{
 \method{summary}{cve}(object, ...)
 }
 \arguments{
 \item{object}{Instance of 'cve' as return of \code{cve}.}
 }
 \description{
 Prints a summary of a \code{cve} result.
 }
--- a/benchmark/benchmark.R
+++ b/benchmark/benchmark.R
@ -1,479 +0,0 @@
 library(microbenchmark)
 dyn.load("benchmark.so")
 ## rowSum* .call --------------------------------------------------------------
 rowSums.c <- function(M) {
    stopifnot(
        is.matrix(M),
        is.numeric(M)
    )
    if (!is.double(M)) {
        M <- matrix(as.double(M), nrow = nrow(M))
    }
    .Call('R_rowSums', PACKAGE = 'benchmark', M)
 }
 rowSumsV2.c <- function(M) {
    stopifnot(
        is.matrix(M),
        is.numeric(M)
    )
    if (!is.double(M)) {
        M <- matrix(as.double(M), nrow = nrow(M))
    }
    .Call('R_rowSumsV2', PACKAGE = 'benchmark', M)
 }
 rowSumsV3.c <- function(M) {
    stopifnot(
        is.matrix(M),
        is.numeric(M)
    )
    if (!is.double(M)) {
        M <- matrix(as.double(M), nrow = nrow(M))
    }
    .Call('R_rowSumsV3', PACKAGE = 'benchmark', M)
 }
 colSums.c <- function(M) {
    stopifnot(
        is.matrix(M),
        is.numeric(M)
    )
    if (!is.double(M)) {
        M <- matrix(as.double(M), nrow = nrow(M))
    }
    .Call('R_colSums', PACKAGE = 'benchmark', M)
 }
 rowSquareSums.c <- function(M) {
    stopifnot(
        is.matrix(M),
        is.numeric(M)
    )
    if (!is.double(M)) {
        M <- matrix(as.double(M), nrow = nrow(M))
    }
    .Call('R_rowSquareSums', PACKAGE = 'benchmark', M)
 }
 rowSumsSymVec.c <- function(vecA, nrow, diag = 0.0) {
    stopifnot(
        is.vector(vecA),
        is.numeric(vecA),
        is.numeric(diag),
        nrow * (nrow - 1) == length(vecA) * 2
    )
    if (!is.double(vecA)) {
        vecA <- as.double(vecA)
    }
    .Call('R_rowSumsSymVec', PACKAGE = 'benchmark',
          vecA, as.integer(nrow), as.double(diag))
 }
 rowSweep.c <- function(A, v, op = '-') {
    stopifnot(
        is.matrix(A),
        is.numeric(v)
    )
    if (!is.double(A)) {
        A <- matrix(as.double(A), nrow = nrow(A))
    }
    if (!is.vector(v) || !is.double(v)) {
        v <- as.double(v)
    }
    stopifnot(
        nrow(A) == length(v),
        op %in% c('+', '-', '*', '/')
    )
    .Call('R_rowSweep', PACKAGE = 'benchmark', A, v, op)
 }
 ## row*, col* tests ------------------------------------------------------------
 n <- 3000
 M <- matrix(runif(n * 12), n, 12)
 stopifnot(
    all.equal(rowSums(M^2), rowSums.c(M^2)),
    all.equal(colSums(M), colSums.c(M)),
    all.equal(rowSums(M), rowSumsV2.c(M)),
    all.equal(rowSums(M), rowSumsV3.c(M))
 )
 microbenchmark(
    rowSums     = rowSums(M),
    rowSums.c   = rowSums.c(M),
    rowSumsV2.c = rowSumsV2.c(M),
    rowSumsV3.c = rowSumsV3.c(M)
 )
 microbenchmark(
    rowSums     = rowSums(M^2),
    rowSums.c   = rowSums.c(M^2),
    rowSqSums.c = rowSquareSums.c(M)
 )
 microbenchmark(
    colSums   = colSums(M),
    colSums.c = colSums.c(M)
 )
 sum = rowSums(M)
 stopifnot(all.equal(
    sweep(M, 1, sum, FUN = `/`),
    rowSweep.c(M, sum, '/') # Col-Normalize)
 ), all.equal(
    sweep(M, 1, sum, FUN = `/`),
    M / sum
 ))
 microbenchmark(
    sweep = sweep(M, 1, sum, FUN = `/`),
    M / sum,
    rowSweep.c = rowSweep.c(M, sum, '/') # Col-Normalize)
 )
 # Create symmetric matrix with constant diagonal entries.
 nrow <- 200
 diag <- 1.0
 Sym <- tcrossprod(runif(nrow))
 diag(Sym) <- diag
 # Get vectorized lower triangular part of `Sym` matrix.
 SymVec <- Sym[lower.tri(Sym)]
 stopifnot(all.equal(
    rowSums(Sym),
    rowSumsSymVec.c(SymVec, nrow, diag)
 ))
 microbenchmark(
    rowSums = rowSums(Sym),
    rowSums.c = rowSums.c(Sym),
    rowSumsSymVec.c = rowSumsSymVec.c(SymVec, nrow, diag)
 )
 ## Matrix-Matrix opperation .call ---------------------------------------------
 transpose.c <- function(A) {
    stopifnot(
        is.matrix(A), is.numeric(A)
    )
    if (!is.double(A)) {
        A <- matrix(as.double(A), nrow(A), ncol(A))
    }
    .Call('R_transpose', PACKAGE = 'benchmark', A)
 }
 sympMV.c <- function(vecA, x) {
    stopifnot(
        is.vector(vecA), is.numeric(vecA),
        is.vector(x), is.numeric(x),
        length(x) * (length(x) + 1) == 2 * length(vecA)
    )
    if (!is.double(vecA)) {
        vecA <- as.double(vecA)
    }
    if (!is.double(x)) {
        x <- as.double(x)
    }
    .Call('R_sympMV', PACKAGE = 'benchmark', vecA, x)
 }
 matrixprod.c <- function(A, B) {
    stopifnot(
        is.matrix(A), is.numeric(A),
        is.matrix(B), is.numeric(B),
        ncol(A) == nrow(B)
    )
    if (!is.double(A)) {
        A <- matrix(as.double(A), nrow = nrow(A))
    }
    if (!is.double(B)) {
        B <- matrix(as.double(B), nrow = nrow(B))
    }
    .Call('R_matrixprod', PACKAGE = 'benchmark', A, B)
 }
 crossprod.c <- function(A, B) {
    stopifnot(
        is.matrix(A), is.numeric(A),
        is.matrix(B), is.numeric(B),
        nrow(A) == nrow(B)
    )
    if (!is.double(A)) {
        A <- matrix(as.double(A), nrow = nrow(A))
    }
    if (!is.double(B)) {
        B <- matrix(as.double(B), nrow = nrow(B))
    }
    .Call('R_crossprod', PACKAGE = 'benchmark', A, B)
 }
 kronecker.c <- function(A, B, op = '*') {
    stopifnot(
        is.matrix(A), is.numeric(A),
        is.matrix(B), is.numeric(B),
        is.character(op), op %in% c('*', '+', '/', '-')
    )
    if (!is.double(A)) {
        A <- matrix(as.double(A), nrow = nrow(A))
    }
    if (!is.double(B)) {
        B <- matrix(as.double(B), nrow = nrow(B))
    }
    .Call('R_kronecker', PACKAGE = 'benchmark', A, B, op)
 }
 skewSymRank2k.c <- function(A, B, alpha = 1, beta = 0) {
    stopifnot(
        is.matrix(A), is.numeric(A),
        is.matrix(B), is.numeric(B),
        all(dim(A) == dim(B)),
        is.numeric(alpha), length(alpha) == 1L,
        is.numeric(beta), length(beta) == 1L
    )
    if (!is.double(A)) {
        A <- matrix(as.double(A), nrow = nrow(A))
    }
    if (!is.double(B)) {
        B <- matrix(as.double(B), nrow = nrow(B))
    }
    .Call('R_skewSymRank2k', PACKAGE = 'benchmark', A, B,
          as.double(alpha), as.double(beta))
 }
 ## Matrix-Matrix opperation tests ---------------------------------------------
 n <- 200
 k <- 100
 m <- 300
 A <- matrix(runif(n * k), n, k)
 B <- matrix(runif(k * m), k, m)
 stopifnot(
    all.equal(t(A), transpose.c(A))
 )
 microbenchmark(
    t(A),
    transpose.c(A)
 )
 Sym <- tcrossprod(runif(n))
 vecSym <- Sym[lower.tri(Sym, diag = T)]
 x <- runif(n)
 stopifnot(all.equal(
    as.double(Sym %*% x),
    sympMV.c(vecSym, x)
 ))
 microbenchmark(
    Sym %*% x,
    sympMV.c = sympMV.c(vecSym, x)
 )
 stopifnot(
    all.equal(A %*% B, matrixprod.c(A, B))
 )
 microbenchmark(
    "%*%" = A %*% B,
    matrixprod.c = matrixprod.c(A, B)
 )
 A <- matrix(runif(k * n), k, n)
 B <- matrix(runif(k * m), k, m)
 stopifnot(
    all.equal(crossprod(A, B), crossprod.c(A, B))
 )
 microbenchmark(
    crossprod = crossprod(A, B),
    crossprod.c = crossprod.c(A, B)
 )
 n <- 100L
 m <- 12L
 p <- 11L
 q <- 10L
 A <- matrix(runif(n * m), n, m)
 B <- matrix(runif(p * q), p, q)
 stopifnot(all.equal(
    kronecker(A, B),
    kronecker.c(A, B)
 ))
 microbenchmark(
    kronecker = kronecker(A, B),
    kronecker.c = kronecker.c(A, B)
 )
 n <- 12
 k <- 11
 A <- matrix(runif(n * k), n, k)
 B <- matrix(runif(n * k), n, k)
 stopifnot(all.equal(
    A %*% t(B) - B %*% t(A), skewSymRank2k.c(A, B)
 ))
 microbenchmark(
    A %*% t(B) - B %*% t(A),
    skewSymRank2k.c(A, B)
 )
 ## Orthogonal projection onto null space .Call --------------------------------
 nullProj.c <- function(B) {
    stopifnot(
        is.matrix(B), is.numeric(B)
    )
    if (!is.double(B)) {
        B <- matrix(as.double(B), nrow = nrow(B))
    }
    .Call('R_nullProj', PACKAGE = 'benchmark', B)
 }
 ## Orthogonal projection onto null space tests --------------------------------
 p <- 12
 q <- 10
 V <- qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q)))
 # Projection matrix onto `span(V)`
 Q <- diag(1, p) - tcrossprod(V, V)
 stopifnot(
    all.equal(Q, nullProj.c(V))
 )
 microbenchmark(
    nullProj = diag(1, p) - tcrossprod(V, V),
    nullProj.c = nullProj.c(V)
 )
 # ## Kronecker optimizations ----------------------------------------------------
 # library(microbenchmark)
 # dist.1 <- function(X_diff, Q) {
 #     rowSums((X_diff %*% Q)^2)
 # }
 # dist.2 <- function(X, Q) {
 #     ones <- rep(1, nrow(X))
 #     proj <- X %*% Q
 #     rowSums((kronecker(proj, ones) - kronecker(ones, proj))^2)
 # }
 # n <- 400L
 # p <- 12L
 # k <- 2L
 # q <- p - k
 # X <- matrix(rnorm(n * p), n, p)
 # Q <- diag(1, p) - tcrossprod(rnorm(p))
 # ones <- rep(1, n)
 # X_diff <- kronecker(X, ones) - kronecker(ones, X)
 # stopifnot(all.equal(dist.1(X_diff, Q), dist.2(X, Q)))
 # microbenchmark(
 #     dist.1(X_diff, Q),
 #     dist.2(X, Q),
 #     times = 10L
 # )
 #     # if (!persistent) {
 #     #     pair.index <- elem.pairs(seq(n))
 #     #     i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
 #     #     j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
 #     #     lower <- ((i - 1) * n) + j
 #     #     upper <- ((j - 1) * n) + i
 #     #     X_diff <- X[i, , drop = F] - X[j, , drop = F]
 #     # }
 #     # # Projection matrix onto `span(V)`
 #     # Q <- diag(1, p) - tcrossprod(V, V)
 #     # # Vectorized distance matrix `D`.
 #     # vecD <- rowSums((X_diff %*% Q)^2)
 # ## WIP for gradient. ----------------------------------------------------------
 grad.c <- function(X, X_diff, Y, V, h) {
    stopifnot(
        is.matrix(X), is.double(X),
        is.matrix(X_diff), is.double(X_diff),
            ncol(X_diff) == ncol(X), nrow(X_diff) == nrow(X) * (nrow(X) - 1) / 2,
        is.vector(Y) || (is.matrix(Y) && pmin(dim(Y)) == 1L), is.double(Y),
            length(Y) == nrow(X),
        is.matrix(V), is.double(V),
            nrow(V) == ncol(X),
        is.vector(h), is.numeric(h), length(h) == 1
    )
    .Call('R_grad', PACKAGE = 'benchmark',
          X, X_diff, as.double(Y), V, as.double(h));
 }
 elem.pairs <- function(elements) {
    # Number of elements to match.
    n <- length(elements)
    # Create all combinations.
    pairs <- rbind(rep(elements, each=n), rep(elements, n))
    # Select unique combinations without self interaction.
    return(pairs[, pairs[1, ] < pairs[2, ]])
 }
 grad <- function(X, Y, V, h, persistent = TRUE) {
    n <- nrow(X)
    p <- ncol(X)
    if (!persistent) {
        pair.index <- elem.pairs(seq(n))
        i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
        j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
        lower <- ((i - 1) * n) + j
        upper <- ((j - 1) * n) + i
        X_diff <- X[i, , drop = F] - X[j, , drop = F]
    }
    # Projection matrix onto `span(V)`
    Q <- diag(1, p) - tcrossprod(V, V)
    # Vectorized distance matrix `D`.
    vecD <- rowSums((X_diff %*% Q)^2)
    # Create Kernel matrix (aka. apply kernel to distances)
    K <- matrix(1, n, n) # `exp(0) == 1`
    K[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part
    K[upper] <- t(K)[upper] # Mirror lower tri. to upper
    # Weighted `Y` momentums
    colSumsK <- colSums(K)
    y1 <- (K %*% Y) / colSumsK
    y2 <- (K %*% Y^2) / colSumsK
    # Per example loss `L(V, X_i)`
    L <- y2 - y1^2
    # Compute scaling vector `vecS` for `X_diff`.
    tmp <- kronecker(matrix(y1, n, 1), matrix(Y, 1, n), `-`)^2
    tmp <- as.vector(L) - tmp
    tmp <- tmp * K / colSumsK
    vecS <- (tmp + t(tmp))[lower] * vecD
    G <- crossprod(X_diff, X_diff * vecS) %*% V
    G <- (-2 / (n * h^2)) * G
    return(G)
 }
 rStiefel <- function(p, q) {
    return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
 }
 n <- 200
 p <- 12
 q <- 10
 X <- matrix(runif(n * p), n, p)
 Y <- runif(n)
 V <- rStiefel(p, q)
 h <- 0.1
 pair.index <- elem.pairs(seq(n))
 i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
 j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
 lower <- ((i - 1) * n) + j
 upper <- ((j - 1) * n) + i
 X_diff <- X[i, , drop = F] - X[j, , drop = F]
 stopifnot(all.equal(
    grad(X, Y, V, h),
    grad.c(X, X_diff, Y, V, h)
 ))
 microbenchmark(
    grad = grad(X, Y, V, h),
    grad.c = grad.c(X, X_diff, Y, V, h)
 )
--- a/benchmark/benchmark.c
+++ b/benchmark/benchmark.c
@ -1,510 +0,0 @@
 #include <stdlib.h>
 #include <string.h> // for `mem*` functions.
 #include <R_ext/BLAS.h>
 #include <R_ext/Lapack.h>
 #include <R_ext/Error.h>
 // #include <Rmath.h>
 #include "benchmark.h"
 void rowSums(const double *A,
             const int nrow, const int ncol,
             double *sum) {
    int i, j, block_size, block_size_i;
    const double *A_block = A;
    const double *A_end = A + nrow * ncol;
    if (nrow > 508) {
        block_size = 508;
    } else {
        block_size = nrow;
    }
    // Iterate `(block_size_i, ncol)` submatrix blocks.
    for (i = 0; i < nrow; i += block_size_i) {
        // Reset `A` to new block beginning.
        A = A_block;
        // Take block size of eveything left and reduce to max size.
        block_size_i = nrow - i;
        if (block_size_i > block_size) {
            block_size_i = block_size;
        }
        // Copy blocks first column.
        for (j = 0; j < block_size_i; j += 4) {
            sum[j]     = A[j];
            sum[j + 1] = A[j + 1];
            sum[j + 2] = A[j + 2];
            sum[j + 3] = A[j + 3];
        }
        for (; j < block_size_i; ++j) {
            sum[j] = A[j];
        }
        // Sum following columns to the first one.
        for (A += nrow; A < A_end; A += nrow) {
            for (j = 0; j < block_size_i; j += 4) {
                sum[j]     += A[j];
                sum[j + 1] += A[j + 1];
                sum[j + 2] += A[j + 2];
                sum[j + 3] += A[j + 3];
            }
            for (; j < block_size_i; ++j) {
                sum[j] += A[j];
            }
        }
        // Step one block forth.
        A_block += block_size_i;
        sum += block_size_i;
    }
 }
 void rowSumsV2(const double *A,
               const int nrow, const int ncol,
               double *sum) {
    int i, j, block_size, block_size_i;
    const double *A_block = A;
    const double *A_end = A + nrow * ncol;
    if (nrow > CVE_MEM_CHUNK_SIZE) {
        block_size = CVE_MEM_CHUNK_SIZE;
    } else {
        block_size = nrow;
    }
    // Iterate `(block_size_i, ncol)` submatrix blocks.
    for (i = 0; i < nrow; i += block_size_i) {
        // Reset `A` to new block beginning.
        A = A_block;
        // Take block size of eveything left and reduce to max size.
        block_size_i = nrow - i;
        if (block_size_i > block_size) {
            block_size_i = block_size;
        }
        // Compute first blocks column,
        for (j = 0; j < block_size_i; ++j) {
            sum[j] = A[j];
        }
        // and sum the following columns to the first one.
        for (A += nrow; A < A_end; A += nrow) {
            for (j = 0; j < block_size_i; ++j) {
                sum[j] += A[j];
            }
        }
        // Step one block forth.
        A_block += block_size_i;
        sum += block_size_i;
    }
 }
 void rowSumsV3(const double *A,
               const int nrow, const int ncol,
               double *sum) {
    int i, onei = 1;
    double* ones = (double*)malloc(ncol * sizeof(double));
    const double one = 1.0;
    const double zero = 0.0;
    for (i = 0; i < ncol; ++i) {
        ones[i] = 1.0;
    }
    matrixprod(A, nrow, ncol, ones, ncol, 1, sum);
    free(ones);
 }
 void colSums(const double *A, const int nrow, const int ncol,
             double *sums) {
    int i, j, nrowb = 4 * (nrow / 4); // 4 dividable nrow block, biggest 4*k <= nrow.
    double sum;
    for (j = 0; j < ncol; ++j) {
        sum = 0.0;
        for (i = 0; i < nrowb; i += 4) {
            sum += A[i]
                 + A[i + 1]
                 + A[i + 2]
                 + A[i + 3];
        }
        for (; i < nrow; ++i) {
            sum += A[i];
        }
        *(sums++) = sum;
        A += nrow;
    }
 }
 void rowSquareSums(const double *A,
                   const int nrow, const int ncol,
                   double *sum) {
    int i, j, block_size, block_size_i;
    const double *A_block = A;
    const double *A_end = A + nrow * ncol;
    if (nrow > 508) {
        block_size = 508;
    } else {
        block_size = nrow;
    }
    // Iterate `(block_size_i, ncol)` submatrix blocks.
    for (i = 0; i < nrow; i += block_size_i) {
        // Reset `A` to new block beginning.
        A = A_block;
        // Take block size of eveything left and reduce to max size.
        block_size_i = nrow - i;
        if (block_size_i > block_size) { // TODO: contains BUG!!! floor last one !!!
            block_size_i = block_size;
        } /// ...
        // TODO:
        // Copy blocks first column.
        for (j = 0; j < block_size_i; j += 4) {
            sum[j]     = A[j]     * A[j];
            sum[j + 1] = A[j + 1] * A[j + 1];
            sum[j + 2] = A[j + 2] * A[j + 2];
            sum[j + 3] = A[j + 3] * A[j + 3];
        }
        for (; j < block_size_i; ++j) {
            sum[j] = A[j] * A[j];
        }
        // Sum following columns to the first one.
        for (A += nrow; A < A_end; A += nrow) {
            for (j = 0; j < block_size_i; j += 4) {
                sum[j]     += A[j]     * A[j];
                sum[j + 1] += A[j + 1] * A[j + 1];
                sum[j + 2] += A[j + 2] * A[j + 2];
                sum[j + 3] += A[j + 3] * A[j + 3];
            }
            for (; j < block_size_i; ++j) {
                sum[j] += A[j] * A[j];
            }
        }
        // Step one block forth.
        A_block += block_size_i;
        sum += block_size_i;
    }
 }
 void rowSumsSymVec(const double *Avec, const int nrow,
                   const double diag,
                   double *sum) {
    int i, j;
    if (diag == 0.0) {
        memset(sum, 0, nrow * sizeof(double));
    } else {
        for (i = 0; i < nrow; ++i) {
            sum[i] = diag;
        }
    }
    for (j = 0; j < nrow; ++j) {
        for (i = j + 1; i < nrow; ++i, ++Avec) {
            sum[j] += *Avec;
            sum[i] += *Avec;
        }
    }
 }
 #define ROW_SWEEP_ALG(op)                                  \
    /* Iterate `(block_size_i, ncol)` submatrix blocks. */ \
    for (i = 0; i < nrow; i += block_size_i) {             \
        /* Set `A` and `C` to block beginning. */          \
        A = A_block;                                       \
        C = C_block;                                       \
        /* Get current block's row size. */                \
        block_size_i = nrow - i;                           \
        if (block_size_i > block_size) {                   \
            block_size_i = block_size;                     \
        }                                                  \
        /* Perform element wise operation for block. */    \
        for (; A < A_end; A += nrow, C += nrow) {          \
            for (j = 0; j < block_size_i; ++j) {           \
                C[j] = (A[j]) op (v[j]);                   \
            }                                              \
        }                                                  \
        /* Step one block forth. */                        \
        A_block += block_size_i;                           \
        C_block += block_size_i;                           \
        v += block_size_i;                                 \
    }
 /* C[, j] = A[, j] * v for each j = 1 to ncol */
 void rowSweep(const double *A, const int nrow, const int ncol,
              const char* op,
              const double *v, // vector of length nrow
              double *C) {
    int i, j, block_size, block_size_i;
    const double *A_block = A;
    double *C_block = C;
    const double *A_end = A + nrow * ncol;
    if (nrow > CVE_MEM_CHUNK_SMALL) { // small because 3 vectors in cache
        block_size = CVE_MEM_CHUNK_SMALL;
    } else {
        block_size = nrow;
    }
    if (*op == '+') {
        ROW_SWEEP_ALG(+)
    } else if (*op == '-') {
        ROW_SWEEP_ALG(-)
    } else if (*op == '*') {
        ROW_SWEEP_ALG(*)
    } else if (*op == '/') {
        ROW_SWEEP_ALG(/)
    } else {
        error("Got unknown 'op' (opperation) argument.");
    }
 }
 void transpose(const double *A, const int nrow, const int ncol, double* T) {
    int i, j, len = nrow * ncol;
    // Filling column-wise and accessing row-wise.
    for (i = 0, j = 0; i < len; ++i, j += nrow) {
        if (j >= len) {
            j -= len - 1;
        }
        T[i] = A[j];
    }
 }
 // Symmetric Packed matrix vector product.
 // Computes
 //      y <- Ax
 // where A is supplied as packed lower triangular part of a symmetric
 // matrix A. Meaning that `vecA` is `vec_ltri(A)`.
 void sympMV(const double* vecA, const int nrow, const double* x, double* y) {
    double one = 1.0;
    double zero = 0.0;
    int onei = 1;
    F77_NAME(dspmv)("L", &nrow, &one, vecA, x, &onei, &zero, y, &onei);
 }
 void matrixprod(const double *A, const int nrowA, const int ncolA,
                const double *B, const int nrowB, const int ncolB,
                double *C) {
    const double one = 1.0;
    const double zero = 0.0;
    // DGEMM with parameterization:
    //     C <- A %*% B
    F77_NAME(dgemm)("N", "N", &nrowA, &ncolB, &ncolA,
                    &one, A, &nrowA, B, &nrowB,
                    &zero, C, &nrowA);
 }
 void crossprod(const double *A, const int nrowA, const int ncolA,
               const double *B, const int nrowB, const int ncolB,
               double *C) {
    const double one = 1.0;
    const double zero = 0.0;
    // DGEMM with parameterization:
    //     C <- t(A) %*% B
    F77_NAME(dgemm)("T", "N", &ncolA, &ncolB, &nrowA,
                    &one, A, &nrowA, B, &nrowB,
                    &zero, C, &ncolA);
 }
 #define KRONECKER_ALG(op)                                                      \
    for (j = 0; j < ncolA; ++j) {                                              \
        for (l = 0; l < ncolB; ++l) {                                          \
            colB = B + (l * nrowB);                                            \
            for (i = 0; i < nrowA; ++i) {                                      \
                for (k = 0; k < nrowB; ++k) {                                  \
                    *(C++) = (A[i]) op (colB[k]);                              \
                }                                                              \
            }                                                                  \
        }                                                                      \
        A += nrowA;                                                            \
    }
 void kronecker(const double * restrict A, const int nrowA, const int ncolA,
               const double * restrict B, const int nrowB, const int ncolB,
               const char* op,
               double * restrict C) {
    int i, j, k, l;
    const double *colB;
    if (*op == '+') {
        KRONECKER_ALG(+)
    } else if (*op == '-') {
        KRONECKER_ALG(-)
    } else if (*op == '*') {
        KRONECKER_ALG(*)
    } else if (*op == '/') {
        KRONECKER_ALG(/)
    } else {
        error("Got unknown 'op' (opperation) argument.");
    }
 }
 void nullProj(const double *B, const int nrowB, const int ncolB,
              double *Q) {
    const double minusOne = -1.0;
    const double one = 1.0;
    // Initialize Q as identity matrix.
    memset(Q, 0, sizeof(double) * nrowB * nrowB);
    double *Q_diag, *Q_end = Q + nrowB * nrowB;
    for (Q_diag = Q; Q_diag < Q_end; Q_diag += nrowB + 1) {
        *Q_diag = 1.0;
    }
    // DGEMM with parameterization:
    //     Q <- (-1.0 * B %*% t(B)) + Q
    F77_NAME(dgemm)("N", "T", &nrowB, &nrowB, &ncolB,
                    &minusOne, B, &nrowB, B, &nrowB,
                    &one, Q, &nrowB);
 }
 void rangePairs(const int from, const int to, int *pairs) {
    int i, j;
    for (i = from; i < to; ++i) {
       for (j = i + 1; j < to; ++j) {
            pairs[0] = i;
            pairs[1] = j;
            pairs += 2;
        }
    }
 }
 // A dence skwe-symmetric rank 2 update.
 // Perform the update
 //      C := alpha (A * B^T - B * A^T) + beta C
 void skewSymRank2k(const int nrow, const int ncol,
                   double alpha, const double *A, const double *B,
                   double beta,
                   double *C) {
    F77_NAME(dgemm)("N", "T",
                    &nrow, &nrow, &ncol,
                    &alpha, A, &nrow, B, &nrow,
                    &beta, C, &nrow);
    alpha *= -1.0;
    beta = 1.0;
    F77_NAME(dgemm)("N", "T",
                    &nrow, &nrow, &ncol,
                    &alpha, B, &nrow, A, &nrow,
                    &beta, C, &nrow);
 }
 // TODO: clarify
 static inline double gaussKernel(const double x, const double scale) {
    return exp(scale * x * x);
 }
 // TODO: mutch potential for optimization!!!
 static void weightedYandLoss(const int n,
                             const double *Y,
                             const double *vecD,
                             const double *vecW,
                             const double *colSums,
                             double *y1, double *L, double *vecS,
                             double *loss) {
    int i, j, k, N = n * (n - 1) / 2;
    double l;
    for (i = 0; i < n; ++i) {
        y1[i] = Y[i];
        L[i] = Y[i] * Y[i];
    }
    for (k = j = 0; j < n; ++j) {
        for (i = j + 1; i < n; ++i, ++k) {
            y1[i] += Y[j] * vecW[k];
            y1[j] += Y[i] * vecW[k];
            L[i] += Y[j] * Y[j] * vecW[k];
            L[j] += Y[i] * Y[i] * vecW[k];
        }
    }
    for (i = 0; i < n; ++i) {
        y1[i] /= colSums[i];
        L[i] /= colSums[i];
    }
    l = 0.0;
    for (i = 0; i < n; ++i) {
        l += (L[i] -= y1[i] * y1[i]);
    }
    *loss = l / (double)n;
    for (k = j = 0; j < n; ++j) {
        for (i = j + 1; i < n; ++i, ++k) {
            l = Y[j] - y1[i];
            vecS[k]  = (L[i] - (l * l)) / colSums[i];
            l = Y[i] - y1[j];
            vecS[k] += (L[j] - (l * l)) / colSums[j];
        }
    }
    for (k = 0; k < N; ++k) {
        vecS[k] *= vecW[k] * vecD[k];
    }
 }
 void grad(const int n, const int p, const int q,
          const double *X,
          const double *X_diff,
          const double *Y,
          const double *V,
          const double h,
          double *G, double *loss) {
    // Number of X_i to X_j not trivial pairs.
    int i, N = (n * (n - 1)) / 2;
    double scale = -0.5 / h;
    if (X_diff == (void*)0) {
        // TODO: ...
    }
    // Allocate and compute projection matrix `Q = I_p - V * V^T`
    double *Q = (double*)malloc(p * p * sizeof(double));
    nullProj(V, p, q, Q);
    // allocate and compute vectorized distance matrix with a temporary
    // projection of `X_diff`.
    double *vecD = (double*)malloc(N * sizeof(double));
    double *X_proj;
    if (p < 5) { // TODO: refine that!
        X_proj = (double*)malloc(N * 5 * sizeof(double));
    } else {
        X_proj = (double*)malloc(N * p * sizeof(double));
    }
    matrixprod(X_diff, N, p, Q, p, p, X_proj);
    rowSquareSums(X_proj, N, p, vecD);
    // Apply kernel to distence vector for weights computation.
    double *vecK = X_proj; // reuse memory area, no longer needed.
    for (i = 0; i < N; ++i) {
        vecK[i] = gaussKernel(vecD[i], scale);
    }
    double *colSums = X_proj + N; // still allocated!
    rowSumsSymVec(vecK, n, 1.0, colSums); // rowSums = colSums cause Sym
    // compute weighted responces of first end second momontum, aka y1, y2.
    double *y1 = X_proj + N + n;
    double *L = X_proj + N + (2 * n);
    // Allocate X_diff scaling vector `vecS`, not in `X_proj` mem area because
    // used symultanious to X_proj in final gradient computation.
    double *vecS = (double*)malloc(N * sizeof(double));
    weightedYandLoss(n, Y, vecD, vecK, colSums, y1, L, vecS, loss);
    // compute the gradient using X_proj for intermidiate scaled X_diff.
    rowSweep(X_diff, N, p, "*", vecS, X_proj);
    // reuse Q which has the required dim (p, p).
    crossprod(X_diff, N, p, X_proj, N, p, Q);
    // Product with V
    matrixprod(Q, p, p, V, p, q, G);
    // And final scaling (TODO: move into matrixprod!)
    scale = -2.0 / (((double)n) * h * h);
    N = p * q;
    for (i = 0; i < N; ++i) {
        G[i] *= scale;
    }
    free(vecS);
    free(X_proj);
    free(vecD);
    free(Q);
 }
--- a/benchmark/benchmark.h
+++ b/benchmark/benchmark.h
@ -1,219 +0,0 @@
 #ifndef CVE_INCLUDE_GUARD_
 #define CVE_INCLUDE_GUARD_
 #include <Rinternals.h>
 #define CVE_MEM_CHUNK_SMALL 1016
 #define CVE_MEM_CHUNK_SIZE 2032
 void rowSums(const double *A,
             const int nrow, const int ncol,
             double *sum);
 SEXP R_rowSums(SEXP A) {
    SEXP sums = PROTECT(allocVector(REALSXP, nrows(A)));
    rowSums(REAL(A), nrows(A), ncols(A), REAL(sums));
    UNPROTECT(1);
    return sums;
 }
 void rowSumsV2(const double *A,
               const int nrow, const int ncol,
               double *sum);
 SEXP R_rowSumsV2(SEXP A) {
    SEXP sums = PROTECT(allocVector(REALSXP, nrows(A)));
    rowSumsV2(REAL(A), nrows(A), ncols(A), REAL(sums));
    UNPROTECT(1);
    return sums;
 }
 void rowSumsV3(const double *A,
               const int nrow, const int ncol,
               double *sum);
 SEXP R_rowSumsV3(SEXP A) {
    SEXP sums = PROTECT(allocVector(REALSXP, nrows(A)));
    rowSumsV3(REAL(A), nrows(A), ncols(A), REAL(sums));
    UNPROTECT(1);
    return sums;
 }
 void colSums(const double *A,
             const int nrow, const int ncol,
             double *sum);
 SEXP R_colSums(SEXP A) {
    SEXP sums = PROTECT(allocVector(REALSXP, ncols(A)));
    colSums(REAL(A), nrows(A), ncols(A), REAL(sums));
    UNPROTECT(1);
    return sums;
 }
 void rowSquareSums(const double*, const int, const int, double*);
 SEXP R_rowSquareSums(SEXP A) {
    SEXP result = PROTECT(allocVector(REALSXP, nrows(A)));
    rowSquareSums(REAL(A), nrows(A), ncols(A), REAL(result));
    UNPROTECT(1);
    return result;
 }
 void rowSumsSymVec(const double *Avec, const int nrow,
                   const double diag,
                   double *sum);
 SEXP R_rowSumsSymVec(SEXP Avec, SEXP nrow, SEXP diag) {
    SEXP sum = PROTECT(allocVector(REALSXP, *INTEGER(nrow)));
    rowSumsSymVec(REAL(Avec), *INTEGER(nrow), *REAL(diag), REAL(sum));
    UNPROTECT(1);
    return sum;
 }
 void rowSweep(const double *A, const int nrow, const int ncol,
              const char* op,
              const double *v, // vector of length nrow
              double *C);
 SEXP R_rowSweep(SEXP A, SEXP v, SEXP op) {
    SEXP C = PROTECT(allocMatrix(REALSXP, nrows(A), ncols(A)));
    rowSweep(REAL(A), nrows(A), ncols(A),
             CHAR(STRING_ELT(op, 0)),
             REAL(v), REAL(C));
    UNPROTECT(1);
    return C;
 }
 void transpose(const double *A, const int nrow, const int ncol, double* T);
 SEXP R_transpose(SEXP A) {
    SEXP T = PROTECT(allocMatrix(REALSXP, ncols(A), nrows(A)));
    transpose(REAL(A), nrows(A), ncols(A), REAL(T));
    UNPROTECT(1); /* T */
    return T;
 }
 void sympMV(const double* vecA, const int nrow, const double* x, double* y);
 SEXP R_sympMV(SEXP vecA, SEXP x) {
    SEXP y = PROTECT(allocVector(REALSXP, length(x)));
    sympMV(REAL(vecA), length(x), REAL(x), REAL(y));
    UNPROTECT(1); /* y */
    return y;
 }
 void matrixprod(const double *A, const int nrowA, const int ncolA,
                const double *B, const int nrowB, const int ncolB,
                double *C);
 SEXP R_matrixprod(SEXP A, SEXP B) {
    SEXP C = PROTECT(allocMatrix(REALSXP, nrows(A), ncols(B)));
    matrixprod(REAL(A), nrows(A), ncols(A),
               REAL(B), nrows(B), ncols(B),
               REAL(C));
    UNPROTECT(1);
    return C;
 }
 void crossprod(const double* A, const int nrowA, const int ncolA,
               const double* B, const int ncolB, const int nrowB,
               double* C);
 SEXP R_crossprod(SEXP A, SEXP B) {
    SEXP C = PROTECT(allocMatrix(REALSXP, ncols(A), ncols(B)));
    crossprod(REAL(A), nrows(A), ncols(A),
              REAL(B), nrows(B), ncols(B),
              REAL(C));
    UNPROTECT(1);
    return C;
 }
 void kronecker(const double *A, const int nrowA, const int ncolA,
               const double *B, const int nrowB, const int ncolB,
               const char *op,
               double *C);
 SEXP R_kronecker(SEXP A, SEXP B, SEXP op) {
    SEXP C = PROTECT(allocMatrix(REALSXP,
                                 nrows(A) * nrows(B),
                                 ncols(A) * ncols(B)));
    kronecker(REAL(A), nrows(A), ncols(A),
              REAL(B), nrows(B), ncols(B),
              CHAR(STRING_ELT(op, 0)),
              REAL(C));
    UNPROTECT(1);
    return C;
 }
 void skewSymRank2k(const int n, const int k,
                   double alpha, const double *A, const double *B,
                   double beta,
                   double *C);
 SEXP R_skewSymRank2k(SEXP A, SEXP B, SEXP alpha, SEXP beta) {
    SEXP C = PROTECT(allocMatrix(REALSXP, nrows(A), nrows(A)));
    memset(REAL(C), 0, nrows(A) * nrows(A) * sizeof(double));
    skewSymRank2k(nrows(A), ncols(A),
                  *REAL(alpha), REAL(A), REAL(B),
                  *REAL(beta), REAL(C));
    UNPROTECT(1);
    return C;
 }
 void nullProj(const double* B, const int nrowB, const int ncolB,
              double* Q);
 SEXP R_nullProj(SEXP B) {
    SEXP Q = PROTECT(allocMatrix(REALSXP, nrows(B), nrows(B)));
    nullProj(REAL(B), nrows(B), ncols(B), REAL(Q));
    UNPROTECT(1);
    return Q;
 }
 void rangePairs(const int from, const int to, int *pairs);
 SEXP R_rangePairs(SEXP from, SEXP to) {
    int start = asInteger(from);
    int end = asInteger(to) + 1;
    int n = end - start;
    SEXP out = PROTECT(allocMatrix(INTSXP, 2, n * (n - 1) / 2));
    rangePairs(start, end, INTEGER(out));
    UNPROTECT(1);
    return out;
 }
 void grad(const int n, const int p, const int q,
          const double *X,
          const double *X_diff,
          const double *Y,
          const double *V,
          const double h,
          double *G, double *const loss);
 SEXP R_grad(SEXP X, SEXP X_diff, SEXP Y, SEXP V, SEXP h) {
    int N = (nrows(X) * (nrows(X) - 1)) / 2;
    SEXP G = PROTECT(allocMatrix(REALSXP, nrows(V), ncols(V)));
    SEXP loss = PROTECT(allocVector(REALSXP, 1));
    grad(nrows(X), ncols(X), ncols(V),
         REAL(X), REAL(X_diff), REAL(Y), REAL(V), *REAL(h),
         REAL(G), REAL(loss));
    UNPROTECT(2);
    return G;
 }
 #endif /* CVE_INCLUDE_GUARD_ */
		`@ -1,2 +0,0 @@`
			`runtime_test Runtime comparison of CVE against MAVE for M1 - M5 datasets.`
			`logging Example of a logger function for cve algorithm analysis.`