- update: doc,

- wip: cleanup, - update: datasets, - remove: old code
2019-12-16 17:34:35 +01:00 · 2019-12-16 17:34:35 +01:00 · 9edefe994d
commit 9edefe994d
parent 300fc11f3f
84 changed files with 812 additions and 4077 deletions
--- a/CVE_C/DESCRIPTION
+++ b/CVE_C/DESCRIPTION
--- a/CVE_C/NAMESPACE
+++ b/CVE_C/NAMESPACE
@ -9,15 +9,10 @@ export(cve)
 export(cve.call)
 export(dataset)
 export(directions)
-export(elem.pairs)
 export(estimate.bandwidth)
 export(null)
 export(predict_dim)
-export(projTangentStiefel)
 export(rStiefel)
-export(retractStiefel)
-export(skew)
-export(sym)
 import(stats)
 importFrom(graphics,boxplot)
 importFrom(graphics,lines)
--- a/CVE_C/R/CVE.R
+++ b/CVE_C/R/CVE.R
@ -20,7 +20,7 @@
 #' zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
 #' is an unknown, continuous non-constant function,
 #' and \eqn{B = (b_1, ..., b_k)} is
-#' a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}. 
+#' a real \eqn{p \times k}{p x k} of rank \eqn{k \leq p}{k <= p}. 
 #' Without loss of generality \eqn{B} is assumed to be orthonormal.
 #'
 #' @author Daniel Kapla, Lukas Fertl, Bura Efstathia
@ -36,26 +36,46 @@
 #' @inherit CVE-package description
 #'
 #' @param formula an object of class \code{"formula"} which is a symbolic
-#' description of the model to be fitted.
+#' description of the model to be fitted like \eqn{Y\sim X}{Y ~ X} where
+#' \eqn{Y} is a \eqn{n}-dimensional vector of the response variable and
+#' \eqn{X} is a \eqn{n\times p}{n x p} matrix of the predictors.
 #' @param data an optional data frame, containing the data for the formula if
-#' supplied.
-#' @param method specifies the CVE method variation as one of
+#' supplied like \code{data <- data.frame(Y, X)} with dimension
+#' \eqn{n \times (p + 1)}{n x (p + 1)}. By default the variables are taken from
+#' the environment from which \code{cve} is called.
+#' @param method This character string specifies the method of fitting. The
+#' options are
 #' \itemize{
-#'      \item "simple" exact implementation as described in the paper listed 
-#'          below.
-#'      \item "weighted" variation with addaptive weighting of slices.
+#'      \item "simple" implementation as described in the paper.
+#'      \item "weighted" variation with adaptive weighting of slices.
 #' }
+#' see paper.
 #' @param max.dim upper bounds for \code{k}, (ignored if \code{k} is supplied).
-#' @param ... Parameters passed on to \code{cve.call}.
+#' @param ... optional parameters passed on to \code{cve.call}.
 #'
 #' @return an S3 object of class \code{cve} with components:
 #' \describe{
-#'    \item{X}{Original training data,}
-#'    \item{Y}{Responce of original training data,}
+#'    \item{X}{design matrix of predictor vector used for calculating
+#'      cve-estimate,}
+#'    \item{Y}{\eqn{n}-dimensional vector of responses used for calculating
+#'        cve-estimate,}
 #'    \item{method}{Name of used method,}
 #'    \item{call}{the matched call,}
-#'    \item{res}{list of components \code{V, L, B, loss, h} and \code{k} for
-#'               each \eqn{k=min.dim,...,max.dim} (dimension).}
+#'    \item{res}{list of components \code{V, L, B, loss, h} for
+#'       each \code{k = min.dim, ..., max.dim}. If \code{k} was supplied in the
+#'       call \code{min.dim = max.dim = k}.
+#'       \itemize{
+#'           \item \code{B} is the cve-estimate with dimension
+#'               \eqn{p\times k}{p x k}.
+#'           \item \code{V} is the orthogonal complement of \eqn{B}.
+#'           \item \code{L} is the loss for each sample seperatels such that
+#'               it's mean is \code{loss}.
+#'           \item \code{loss} is the value of the target function that is 
+#'               minimized, evaluated at \eqn{V}.
+#'           \item \code{h} bandwidth parameter used to calculate
+#'               \code{B, V, loss, L}.
+#'       }
+#'    }
 #' }
 #'
 #' @examples
@ -66,7 +86,7 @@
 #' b1 <- rep(1 / sqrt(p), p)
 #' b2 <- (-1)^seq(1, p) / sqrt(p)
 #' B <- cbind(b1, b2)
-#' # samplsize
+#' # sample size
 #' n <- 200
 #' set.seed(21)
 #' # creat predictor data x ~ N(0, I_p)
@ -139,10 +159,12 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
 #' @inherit cve title
 #' @inherit cve description
 #'
+#' @param X Design matrix with dimension \eqn{n\times p}{n x p}.
+#' @param Y numeric array of length \eqn{n} of Responses.
+#' @param h bandwidth or function to estimate bandwidth, defaults to internaly
+#'      estimated bandwidth.
 #' @param nObs parameter for choosing bandwidth \code{h} using
 #'   \code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied).
-#' @param X data matrix with samples in its rows.
-#' @param Y Responses (1 dimensional).
 #' @param method specifies the CVE method variation as one of
 #' \itemize{
 #'      \item "simple" exact implementation as described in the paper listed 
@ -156,19 +178,23 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
 #' @param tau Initial step-size.
 #' @param tol Tolerance for break condition.
 #' @param max.iter maximum number of optimization steps.
-#' @param attempts number of arbitrary different starting points.
-#' @param logger a logger function (only for advanced user, significantly slows
-#'      down the computation).
-#' @param h bandwidth or function to estimate bandwidth, defaults to internaly
-#'      estimated bandwidth.
-#' @param momentum number of [0, 1) giving the ration of momentum for eucledian
-#'      gradient update with a momentum term.
+#' @param attempts If \code{V.init} not supplied, the optimization is carried
+#'      out \code{attempts} times with starting values drawn from the invariant
+#'      measure on the Stiefel manifold (see \code{\link{rStiefel}}).
+#' @param momentum number of \eqn{[0, 1)} giving the ration of momentum for
+#'      eucledian gradient update with a momentum term. \code{momentum = 0}
+#'      corresponds to normal gradient descend.
 #' @param slack Positive scaling to allow small increases of the loss while
-#'      optimizing.
-#' @param gamma step-size reduction multiple.
+#'      optimizing, i.e. \code{slack = 0.1} allows the target function to
+#'      increase up to \eqn{10 \%} in one optimization step.
+#' @param gamma step-size reduction multiple. If gradient step with step size
+#'      \code{tau} is not accepted \code{gamma * tau} is set to the next step
+#'      size.
 #' @param V.init Semi-orthogonal matrix of dimensions `(ncol(X), ncol(X) - k)
-#'      as optimization starting value. (If supplied, \code{attempts} is
-#'      set to 1 and \code{k} to match dimension)
+#'      used as starting value in the optimization. (If supplied,
+#'      \code{attempts} is set to 0 and \code{k} to match dimension).
+#' @param logger a logger function (only for advanced user, slows down the
+#'      computation).
 #'
 #' @inherit cve return
 #'
@ -253,6 +279,7 @@ cve.call <- function(X, Y, method = "simple",
            stop("Dimension missmatch of 'V.init' and 'X'")
        }
        min.dim <- max.dim <- ncol(X) - ncol(V.init)
+        storage.mode(V.init) <- "double"
        attempts <- 0L
    } else if (missing(k) || is.null(k)) {
        min.dim <- as.integer(min.dim)
@ -320,6 +347,9 @@ cve.call <- function(X, Y, method = "simple",
        }
    }

+    # Convert numerical values to "double".
+    storage.mode(X) <- storage.mode(Y) <- "double"
+
    if (is.function(logger)) {
        loggerEnv <- environment(logger)
    } else {
--- a/CVE_C/R/coef.R
+++ b/CVE_C/R/coef.R
@ -1,8 +1,10 @@
 #' Gets estimated SDR basis.
 #'
-#' Returns the SDR basis matrix for SDR dimension(s).
+#' Returns the SDR basis matrix for dimension \code{k}, i.e. returns the
+#' cve-estimate with dimension \eqn{p\times k}{p x k}.
+#'
 #' @param object instance of \code{cve} as output from \code{\link{cve}} or
-#'      \code{\link{cve.call}}
+#'      \code{\link{cve.call}}.
 #' @param k the SDR dimension.
 #' @param ... ignored.
 #'
--- a/CVE/R/datasets.R
+++ b/CVE/R/datasets.R
@ -0,0 +1,279 @@
+#' Multivariate Normal Distribution.
+#'
+#' Random generation for the multivariate normal distribution.
+#' \deqn{X \sim N_p(\mu, \Sigma)}{X ~ N_p(\mu, \Sigma)}
+#'
+#' @param n number of samples.
+#' @param mu mean
+#' @param sigma covariance matrix.
+#'
+#' @return a \eqn{n\times p}{n x p} matrix with samples in its rows.
+#'
+#' @examples
+#' \dontrun{
+#' rmvnorm(20, sigma = matrix(c(2, 1, 1, 2), 2))
+#' rmvnorm(20, mu = c(3, -1, 2))
+#' }
+#' @keywords internal
+rmvnorm <- function(n = 1, mu = rep(0, p), sigma = diag(p)) {
+    if (!missing(sigma)) {
+        p <- nrow(sigma)
+    } else if (!missing(mu)) {
+        mu <- matrix(mu, ncol = 1)
+        p <- nrow(mu)
+    } else {
+        stop("At least one of 'mu' or 'sigma' must be supplied.")
+    }
+
+    # See: https://en.wikipedia.org/wiki/Multivariate_normal_distribution
+    return(rep(mu, each = n) + matrix(rnorm(n * p), n) %*% chol(sigma))
+}
+
+#' Multivariate t distribution.
+#'
+#' Random generation from multivariate t distribution (student distribution).
+#'
+#' @param n number of samples.
+#' @param mu mean
+#' @param sigma a \eqn{k\times k}{k x k} positive definite matrix. If the degree
+#' \eqn{\nu} if bigger than 2 the created covariance is
+#' \deqn{var(x) = \Sigma\frac{\nu}{\nu - 2}}
+#' for \eqn{\nu > 2}.
+#' @param df degree of freedom \eqn{\nu}.
+#'
+#' @return a \eqn{n\times p}{n x p} matrix with samples in its rows.
+#'
+#' @examples
+#' \dontrun{
+#' rmvt(20, c(0, 1), matrix(c(3, 1, 1, 2), 2), 3)
+#' rmvt(20, sigma = matrix(c(2, 1, 1, 2), 2), 3)
+#' rmvt(20, mu = c(3, -1, 2), 3)
+#' }
+#' @keywords internal
+rmvt <- function(n = 1, mu = rep(0, p), sigma = diag(p), df = Inf) {
+    if (!missing(sigma)) {
+        p <- nrow(sigma)
+    } else if (!missing(mu)) {
+        mu <- matrix(mu, ncol = 1)
+        p <- nrow(mu)
+    } else {
+        stop("At least one of 'mu' or 'sigma' must be supplied.")
+    }
+
+    if (df == Inf) {
+        Z <- 1
+    } else {
+        Z <- sqrt(df / rchisq(n, df))
+    }
+
+    return(rmvnorm(n, sigma = sigma) * Z + rep(mu, each = n))
+}
+
+#' Generalized Normal Distribution.
+#'
+#' Random generation for generalized Normal Distribution.
+#'
+#' @param n Number of generated samples.
+#' @param mu mean.
+#' @param alpha first shape parameter.
+#' @param beta second shape parameter.
+#'
+#' @return numeric array of length \eqn{n}.
+#'
+#' @seealso https://en.wikipedia.org/wiki/Generalized_normal_distribution
+#' @keywords internal
+rgnorm <- function(n = 1, mu = 0, alpha = 1, beta = 1) {
+    if (alpha <= 0 | beta <= 0) {
+        stop("alpha and beta must be positive.")
+    }
+    lambda <- (1 / alpha)^beta
+    scales <- qgamma(runif(n), shape = 1 / beta, scale = 1 / lambda)^(1 / beta)
+    return(scales * ((-1)^rbinom(n, 1, 0.5)) + mu)
+}
+
+#' Laplace distribution
+#'
+#' Random generation for Laplace distribution.
+#'
+#' @param n Number of generated samples.
+#' @param mu mean.
+#' @param sd standard deviation.
+#'
+#' @return numeric array of length \eqn{n}.
+#'
+#' @seealso https://en.wikipedia.org/wiki/Laplace_distribution
+#' @keywords internal
+rlaplace <- function(n = 1, mu = 0, sd = 1) {
+    U <- runif(n, -0.5, 0.5)
+    scale <- sd / sqrt(2)
+
+    return(mu - scale * sign(U) * log(1 - 2 * abs(U)))
+}
+
+#' Generates test datasets.
+#'
+#' Provides sample datasets M1-M7 used in the paper Conditional variance
+#' estimation for sufficient dimension reduction, Lukas Fertl, Efstathia Bura.
+#' The general model is given by:
+#' \deqn{Y = g(B'X) + \epsilon}
+#'
+#' @param name One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4",}
+#' \code{"M5"}, \code{"M6"} or \code{"M7"}. Alternative just the dataset number
+#' 1-7.
+#' @param n number of samples.
+#' @param p Dimension of random variable \eqn{X}.
+#' @param sd standard diviation for error term \eqn{\epsilon}.
+#' @param ... Additional parameters only for "M2" (namely \code{pmix} and
+#' \code{lambda}), see: below.
+#'
+#' @return List with elements
+#' \itemize{
+#'      \item{X}{data, a \eqn{n\times p}{n x p} matrix.}
+#'      \item{Y}{response.}
+#'      \item{B}{the dim-reduction matrix}
+#'      \item{name}{Name of the dataset (name parameter)}
+#' }
+#'
+#' @section M1:
+#' The predictors are distributed as
+#' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, \Sigma)} with
+#' \eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for
+#' \eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 1} with a default
+#' of \eqn{n = 100} data points. \eqn{p = 20},
+#' \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, and \eqn{Y} is
+#' given as \deqn{Y = cos(b_1'X) + \epsilon} where \eqn{\epsilon} is
+#' distributed as generalized normal distribution with location 0,
+#' shape-parameter 0.5, and the scale-parameter is chosen such that
+#' \eqn{Var(\epsilon) = 0.5}.
+#' @section M2:
+#' The predictors are distributed as \eqn{X \sim Z 1_p \lambda + N_p(0, I_p)}{X ~ Z 1_p \lambda + N_p(0, I_p)}. with
+#' \eqn{Z \sim 2 Binom(p_{mix}) - 1\in\{-1, 1\}}{Z~2Binom(pmix)-1} where
+#' \eqn{1_p} is the \eqn{p}-dimensional vector of one's, for a subspace
+#' dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points.
+#' \eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
+#' and \eqn{Y} is \deqn{Y = cos(b_1'X) + 0.5\epsilon} where \eqn{\epsilon} is
+#' standard normal.
+#' Defaults for \code{pmix} is 0.3 and \code{lambda} defaults to 1.
+#' @section M3:
+#' The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)}
+#' for a subspace
+#' dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points.
+#' \eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
+#' and \eqn{Y} is 
+#' \deqn{Y = 2 log(|b_1'X| + 2) + 0.5\epsilon} where \eqn{\epsilon} is
+#' standard normal.
+#' @section M4:
+#' The predictors are distributed as \eqn{X\sim N_p(0,\Sigma)}{X~N_p(0,\Sigma)}
+#' with \eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for
+#' \eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 2} with a default
+#' of \eqn{n = 100} data points. \eqn{p = 20},
+#' \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
+#' \eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)}
+#' and \eqn{Y} is given as \deqn{Y = \frac{b_1'X}{0.5 + (1.5 + b_2'X)^2} + 0.5\epsilon}{Y = (b_1'X) / (0.5 + (1.5 + b_2'X)^2) + 0.5\epsilon}
+#' where \eqn{\epsilon} is standard normal.
+#' @section M5:
+#' The predictors are distributed as \eqn{X\sim U([0,1]^p)}{X~U([0, 1]^p)}
+#' where \eqn{U([0, 1]^p)} is the uniform distribution with
+#' independent components on the \eqn{p}-dimensional hypercube for a subspace
+#' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
+#' \eqn{p = 20},
+#' \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
+#' \eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)}
+#' and \eqn{Y} is given as \deqn{Y = cos(\pi b_1'X)(b_2'X + 1)^2 + 0.5\epsilon}
+#' where \eqn{\epsilon} is standard normal.
+#' @section M6:
+#' The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)}
+#' for a subspace dimension of \eqn{k = 3} with a default of \eqn{n = 200} data
+#' point. \eqn{p = 20, b_1 = e_1, b_2 = e_2}, and \eqn{b_3 = e_p}, where
+#' \eqn{e_j} is the \eqn{j}-th unit vector in the \eqn{p}-dimensional space.
+#' \eqn{Y} is given as \deqn{Y = (b_1'X)^2+(b_2'X)^2+(b_3'X)^2+0.5\epsilon}
+#' where \eqn{\epsilon} is standard normal.
+#' @section M7:
+#' The predictors are distributed as \eqn{X\sim t_3(I_p)}{X~t_3(I_p)} where
+#' \eqn{t_3(I_p)} is the standard multivariate t-distribution with 3 degrees of
+#' freedom, for a subspace dimension of \eqn{k = 4} with a default of
+#' \eqn{n = 200} data points.
+#' \eqn{p = 20, b_1 = e_1, b_2 = e_2, b_3 = e_3}, and \eqn{b_4 = e_p}, where
+#' \eqn{e_j} is the \eqn{j}-th unit vector in the \eqn{p}-dimensional space.
+#' \eqn{Y} is given as \deqn{Y = (b_1'X)(b_2'X)^2+(b_3'X)(b_4'X)+0.5\epsilon}
+#' where \eqn{\epsilon} is distributed as generalized normal distribution with
+#' location 0, shape-parameter 1, and the scale-parameter is chosen such that
+#' \eqn{Var(\epsilon) = 0.25}.
+#'
+#' @references Fertl Lukas, Bura Efstathia. (2019), Conditional Variance
+#' Estimation for Sufficient Dimension Reduction. Working Paper.
+#'
+#' @import stats
+#' @importFrom stats rnorm rbinom
+#' @export
+dataset <- function(name = "M1", n = NULL, p = 20, sd = 0.5, ...) {
+    name <- toupper(name)
+    if (nchar(name) == 1) { name <- paste0("M", name) }
+
+    if (name == "M1") {
+        if (missing(n)) { n <- 100 }
+        # B ... `p x 1`
+        B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
+        X <- rmvnorm(n, sigma = 0.5^abs(outer(1:p, 1:p, FUN = `-`)))
+        beta <- 0.5
+        Y <- cos(X %*% B) + rgnorm(n, 0,
+            alpha = sqrt(sd^2 * gamma(1 / beta) / gamma(3 / beta)),
+            beta = beta
+        )
+    } else if (name == "M2") {
+        if (missing(n)) { n <- 100 }
+        params <- list(...)
+        pmix <- if (is.null(params$pmix)) { 0.3 } else { params$pmix }
+        lambda <- if (is.null(params$lambda)) { 1 } else { params$lambda }
+        # B ... `p x 1`
+        B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
+        Z <- 2 * rbinom(n, 1, pmix) - 1
+        X <- matrix(rep(lambda * Z, p) + rnorm(n * p), n)
+        Y <- cos(X %*% B) + rnorm(n, 0, sd)
+    } else if (name == "M3") {
+        if (missing(n)) { n <- 100 }
+        # B ... `p x 1`
+        B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
+        X <- matrix(rnorm(n * p), n)
+        Y <- 2 * log(2 + abs(X %*% B)) + rnorm(n, 0, sd)
+    } else if (name == "M4") {
+        if (missing(n)) { n <- 200 }
+        # B ... `p x 2`
+        B <- cbind(
+            c(rep(1 / sqrt(6), 6), rep(0, p - 6)),
+            c(rep(c(1, -1), 3) / sqrt(6), rep(0, p - 6))
+        )
+        X <- rmvnorm(n, sigma = 0.5^abs(outer(1:p, 1:p, FUN = `-`)))
+        XB <- X %*% B
+        Y <- (XB[, 1]) / (0.5 + (XB[, 2] + 1.5)^2) + rnorm(n, 0, sd)
+    } else if (name == "M5") {
+        if (missing(n)) { n <- 200 }
+        # B ... `p x 2`
+        B <- cbind(
+            c(rep(1,        6), rep(0, p - 6)),
+            c(rep(c(1, -1), 3), rep(0, p - 6))
+        ) / sqrt(6)
+        X <- matrix(runif(n * p), n)
+        XB <- X %*% B
+        Y <- cos(XB[, 1] * pi) * (XB[, 2] + 1)^2 + rnorm(n, 0, sd)
+    } else if (name == "M6") {
+        if (missing(n)) { n <- 200 }
+        # B ... `p x 3`
+        B <- diag(p)[, -(3:(p - 1))]
+        X <- matrix(rnorm(n * p), n)
+        Y <- rowSums((X %*% B)^2) + rnorm(n, 0, sd)
+    } else if (name == "M7") {
+        if (missing(n)) { n <- 400 }
+        # B ... `p x 4`
+        B <- diag(p)[, -(4:(p - 1))]
+        # "R"andom "M"ulti"V"ariate "S"tudent
+        X <- rmvt(n = n, sigma = diag(p), df = 3)
+        XB <- X %*% B
+        Y <- (XB[, 1]) * (XB[, 2])^2 + (XB[, 3]) * (XB[, 4])
+        Y <- Y + rlaplace(n, 0, sd)
+    } else {
+        stop("Got unknown dataset name.")
+    }
+
+    return(list(X = X, Y = Y, B = B, name = name))
+}
--- a/CVE_C/R/directions.R
+++ b/CVE_C/R/directions.R
@ -5,9 +5,15 @@ directions <- function(dr, k) {

 #' Computes projected training data \code{X} for given dimension `k`.
 #'
-#' @param dr Instance of 'cve' as returned by \code{cve}.
+#' Projects the dimensional design matrix \eqn{X} on the columnspace of the
+#' cve-estimate for given dimension \eqn{k}.
+#'
+#' @param dr Instance of \code{'cve'} as returned by \code{\link{cve}}.
 #' @param k SDR dimension to use for projection.
 #'
+#' @return the \eqn{n\times k}{n x k} dimensional matrix \eqn{X B} where \eqn{B}
+#'  is the  cve-estimate for dimension \eqn{k}.
+#'
 #' @examples
 #' # create B for simulation (k = 1)
 #' B <- rep(1, 5) / sqrt(5)
--- a/CVE_C/R/estimateBandwidth.R
+++ b/CVE_C/R/estimateBandwidth.R
@ -1,27 +1,22 @@
 #' Bandwidth estimation for CVE.
 #'
-#' Estimates a bandwidth \code{h} according
+#' If no bandwidth or function for calculating it is supplied, the CVE method
+#' defaults to using the following formula (version 1)
 #' \deqn{%
-#' h = (2 * tr(\Sigma) / p) * (1.2 * n^{-1 / (4 + k)})^2}{%
-#' h = (2 * tr(\Sigma) / p) * (1.2 * n^(\frac{-1}{4 + k}))^2}
-#' with \eqn{n} the sample size, \eqn{p} its dimension
-#' (\code{n <- nrow(X); p <- ncol(X)}) and the covariance-matrix \eqn{\Sigma}
-#' which is \code{(n-1)/n} times the sample covariance estimate.
+#'    h = \frac{2 tr(\Sigma)}{p} (1.2 n^{\frac{-1}{4 + k}})^2}{%
+#'    h = (2 * tr(\Sigma) / p) * (1.2 * n^(-1 / (4 + k)))^2}
+#' Alternative version 2 is used for dimension prediction which is given by
+#'    \deqn{%
+#'    h = (2 * tr(\Sigma) / p) * \chi_k^{-1}(\frac{nObs - 1}{n - 1})}{%
+#'    h = (2 * tr(\Sigma) / p) * \chi_k^-1((nObs - 1) / (n - 1))}
+#' with \eqn{n} the sample size, \eqn{p} its dimension and the
+#' covariance-matrix \eqn{\Sigma}, which is \code{(n-1)/n} times the sample
+#' covariance estimate.
 #'
-#' @param X data matrix with samples in its rows.
+#' @param X a \eqn{n\times p}{n x p} matrix with samples in its rows.
 #' @param k Dimension of lower dimensional projection.
-#' @param nObs number of points in a slice, see \eqn{nObs} in CVE paper.
-#' @param version either \code{1} or \code{2}, where
-#' \itemize{
-#'      \item 1: uses the following formula:
-#'          \deqn{%
-#'          h = (2 * tr(\Sigma) / p) * (1.2 * n^{-1 / (4 + k)})^2}{%
-#'          h = (2 * tr(\Sigma) / p) * (1.2 * n^(\frac{-1}{4 + k}))^2}
-#'      \item 2: uses
-#'          \deqn{%
-#'          h = (2 * tr(\Sigma) / p) * \chi_k^-1((nObs - 1) / (n - 1))}{%
-#'          h = (2 * tr(\Sigma) / p) * \chi_k^{-1}(\frac{nObs - 1}{n - 1})}
-#'  }
+#' @param nObs number of points in a slice, only for version 2.
+#' @param version either \code{1} or \code{2}.
 #'
 #' @return Estimated bandwidth \code{h}.
 #'
--- a/CVE_C/R/plot.R
+++ b/CVE_C/R/plot.R
@ -1,6 +1,9 @@
 #' Loss distribution elbow plot.
 #'
-#' Boxplots of the loss from \code{min.dim} to \code{max.dim} \code{k} values.
+#' Boxplots of the output \code{L} from \code{\link{cve}} over \code{k} from
+#' \code{min.dim} to \code{max.dim}. For given \code{k}, \code{L} corresponds
+#' to \eqn{L_n(V, X_i)} where \eqn{V \in S(p, p - k)}{V} is the minimizer of
+#' \eqn{L_n(V)}, for further details see the paper.
 #'
 #' @param x Object of class \code{"cve"} (result of [\code{\link{cve}}]).
 #' @param ... Pass through parameters to [\code{\link{plot}}] and
@ -31,6 +34,9 @@
 #' # elbow plot
 #' plot(cve.obj.simple)
 #'
+#' @references Fertl Lukas, Bura Efstathia. (2019), Conditional Variance
+#' Estimation for Sufficient Dimension Reduction. Working Paper.
+#'
 #' @seealso see \code{\link{par}} for graphical parameters to pass through
 #'      as well as \code{\link{plot}}, the standard plot utility.
 #' @method plot cve
--- a/CVE_C/R/predict.R
+++ b/CVE_C/R/predict.R
@ -1,6 +1,7 @@
 #' Predict method for CVE Fits.
 #'
-#' Predict responces using reduced data with \code{\link{mars}}.
+#' Predict response using projected data where the forward model \eqn{g(B'X)}
+#' is estimated using \code{\link{mars}}.
 #'
 #' @param object instance of class \code{cve} (result of \code{cve},
 #'  \code{cve.call}).
@ -36,7 +37,7 @@
 #'
 #' # plot prediction against y.test
 #' plot(yhat, y.test)
-#' @seealso \code{\link{cve}}, \code{\link{cve.call}} or \pkg{\link{mars}}.
+#' @seealso \code{\link{cve}}, \code{\link{cve.call}} and \pkg{\link{mars}}.
 #'
 #' @rdname predict.cve
 #'
--- a/CVE_C/R/predict_dim.R
+++ b/CVE_C/R/predict_dim.R
@ -36,10 +36,6 @@ predict_dim_elbow <- function(object) {
    # Get dimensions
    n <- nrow(X)
    p <- ncol(X)
-    # Compute persistent data.
-    i = rep(1:n, n)
-    j = rep(1:n, each = n)
-    D.eucl = matrix((X[i, ] - X[j, ])^2 %*% rep(1, p), n)

    losses <- vector("double", length(object$res))
    names(losses) <- names(object$res)
@ -48,16 +44,16 @@ predict_dim_elbow <- function(object) {
        # extract dimension specific estimates and dimensions.
        k <- dr.k$k
        V <- dr.k$V
-        q <- ncol(V)
-        # estimate bandwidth according alternative formula (see: TODO: see)
+        # estimate bandwidth according alternative formula.
        h <- estimate.bandwidth(X, k, sqrt(n), version = 2L)
        # Projected `X`
-        XV <- X %*% V
-        # Devectorized distance matrix
-        # (inefficient in R but fast in C)
-        D <- matrix((XV[i, , drop = F] - XV[j, , drop = F])^2 %*% rep(1, q), n)
-        D <- D.eucl - D
+        XQ <- X %*% (diag(1, p) - tcrossprod(V)) # X (I - V V')
+        # Compute distances
+        d2 <- tcrossprod(XQ) # XQ XQ'
+        d1 <- matrix(diag(d2), n, n)
+        D <- d1 - 2 * d2 + t(d1)
        # Apply kernel
+        # Note: CVE uses for d = ||Q(X_i - X_j)|| the kernel exp(-d^4 / (2 h^2))
        K <- exp((-0.5 / h^2) * D^2)
        # sum columns
        colSumsK <- colSums(K)
@ -81,11 +77,7 @@ predict_dim_wilcoxon <- function(object, p.value = 0.05) {
    # Get dimensions
    n <- nrow(X)
    p <- ncol(X)
-    # Compute persistent data.
-    i = rep(1:n, n)
-    j = rep(1:n, each = n)
-    D.eucl = matrix((X[i, ] - X[j, ])^2 %*% rep(1, p), n)
-
+    
    L <- matrix(NA, n, length(object$res))
    colnames(L) <- names(object$res)
    # Compute per sample losses with alternative bandwidth for each dimension.
@ -93,16 +85,16 @@ predict_dim_wilcoxon <- function(object, p.value = 0.05) {
        # extract dimension specific estimates and dimensions.
        k <- dr.k$k
        V <- dr.k$V
-        q <- ncol(V)
-        # estimate bandwidth according alternative formula (see: TODO: see)
+        # estimate bandwidth according alternative formula.
        h <- estimate.bandwidth(X, k, sqrt(n), version = 2L)
        # Projected `X`
-        XV <- X %*% V
-        # Devectorized distance matrix
-        # (inefficient in R but fast in C)
-        D <- matrix((XV[i, , drop = F] - XV[j, , drop = F])^2 %*% rep(1, q), n)
-        D <- D.eucl - D
+        XQ <- X %*% (diag(1, p) - tcrossprod(V)) # X (I - V V')
+        # Compute distances
+        d2 <- tcrossprod(XQ) # XQ XQ'
+        d1 <- matrix(diag(d2), n, n)
+        D <- d1 - 2 * d2 + t(d1)
        # Apply kernel
+        # Note: CVE uses for d = ||Q(X_i - X_j)|| the kernel exp(-d^4 / (2 h^2))
        K <- exp((-0.5 / h^2) * D^2)
        # sum columns
        colSumsK <- colSums(K)
@ -130,27 +122,24 @@ predict_dim_wilcoxon <- function(object, p.value = 0.05) {
    ))
 }

-#' Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.
-#'  TODO: rewrite!!!
+#' \code{"TODO: @Lukas"}
 #' 
-#' @param object instance of class \code{cve} (result of \code{cve},
-#'  \code{cve.call}).
+#' @param object instance of class \code{cve} (result of \code{\link{cve}},
+#'  \code{\link{cve.call}}).
+#' @param method one of \code{"CV"}, \code{"elbow"} or \code{"wilcoxon"}.
 #' @param ... ignored.
 #'
-#' @return list with
-#' \itemize{
-#'    \item MSE: Mean Square Error,
-#'    \item k: predicted dimensions.
-#' }
+#' @return list with \code{"k"} the predicted dimension and method dependent 
+#'      informatoin.
 #'
-#' @section cv:
-#'  Cross-validation ... TODO:
+#' @section Method cv:
+#' TODO: \code{"TODO: @Lukas"}.
 #'
-#' @section elbow:
-#'  Cross-validation ... TODO:
+#' @section Method elbow:
+#' TODO: \code{"TODO: @Lukas"}.
 #'
-#' @section wilcoxon:
-#'  Cross-validation ... TODO:
+#' @section Method wilcoxon:
+#' TODO: \code{"TODO: @Lukas"}.
 #'
 #' @examples
 #' # create B for simulation
--- a/CVE_C/R/summary.R
+++ b/CVE_C/R/summary.R
@ -1,5 +1,9 @@
 #' Prints a summary of a \code{cve} result.
-#' @param object Instance of 'cve' as returned by \code{cve}.
+#'
+#' Prints a summary statistics of output \code{L} from \code{cve} for
+#' \code{k = min.dim, ..., max.dim}.
+#'
+#' @param object Instance of \code{"cve"} as returned by \code{\link{cve}}.
 #' @param ... ignored.
 #'
 #' @examples
--- a/CVE/R/util.R
+++ b/CVE/R/util.R
@ -0,0 +1,24 @@
+#' Draws a sample from the invariant measure on the Stiefel manifold
+#' \eqn{S(p, q)}.
+#'
+#' @param p row dimension
+#' @param q col dimension
+#' @return \eqn{p \times q}{p x q} semi-orthogonal matrix.
+#' @examples
+#'  V <- rStiefel(6, 4)
+#' @export
+rStiefel <- function(p, q) {
+    return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
+}
+
+#' Null space basis of given matrix `V`
+#'
+#' @param V `(p, q)` matrix
+#' @return Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`.
+#' @keywords internal
+#' @export
+null <- function(V) {
+    tmp <- qr(V)
+    set <- if(tmp$rank == 0L) seq_len(ncol(V)) else -seq_len(tmp$rank)
+    return(qr.Q(tmp, complete = TRUE)[, set, drop = FALSE])
+}
--- a/CVE_C/inst/doc/CVE_paper.pdf
+++ b/CVE_C/inst/doc/CVE_paper.pdf
--- a/CVE_C/man/CVE-package.Rd
+++ b/CVE_C/man/CVE-package.Rd
@ -26,7 +26,7 @@ variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
 zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
 is an unknown, continuous non-constant function,
 and \eqn{B = (b_1, ..., b_k)} is
-a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}. 
+a real \eqn{p \times k}{p x k} of rank \eqn{k \leq p}{k <= p}. 
 Without loss of generality \eqn{B} is assumed to be orthonormal.
 }
 \references{
--- a/CVE_C/man/coef.cve.Rd
+++ b/CVE_C/man/coef.cve.Rd
@ -8,7 +8,7 @@
 }
 \arguments{
 \item{object}{instance of \code{cve} as output from \code{\link{cve}} or
-\code{\link{cve.call}}}
+\code{\link{cve.call}}.}

 \item{k}{the SDR dimension.}

@ -18,7 +18,8 @@
 dir the matrix of CS or CMS of given dimension
 }
 \description{
-Returns the SDR basis matrix for SDR dimension(s).
+Returns the SDR basis matrix for dimension \code{k}, i.e. returns the
+cve-estimate with dimension \eqn{p\times k}{p x k}.
 }
 \examples{
 # set dimensions for simulation model
--- a/CVE_C/man/cve.Rd
+++ b/CVE_C/man/cve.Rd
@ -8,31 +8,51 @@ cve(formula, data, method = "simple", max.dim = 10L, ...)
 }
 \arguments{
 \item{formula}{an object of class \code{"formula"} which is a symbolic
-description of the model to be fitted.}
+description of the model to be fitted like \eqn{Y\sim X}{Y ~ X} where
+\eqn{Y} is a \eqn{n}-dimensional vector of the response variable and
+\eqn{X} is a \eqn{n\times p}{n x p} matrix of the predictors.}

 \item{data}{an optional data frame, containing the data for the formula if
-supplied.}
+supplied like \code{data <- data.frame(Y, X)} with dimension
+\eqn{n \times (p + 1)}{n x (p + 1)}. By default the variables are taken from
+the environment from which \code{cve} is called.}

-\item{method}{specifies the CVE method variation as one of
+\item{method}{This character string specifies the method of fitting. The
+options are
 \itemize{
-     \item "simple" exact implementation as described in the paper listed 
-         below.
-     \item "weighted" variation with addaptive weighting of slices.
-}}
+     \item "simple" implementation as described in the paper.
+     \item "weighted" variation with adaptive weighting of slices.
+}
+see paper.}

 \item{max.dim}{upper bounds for \code{k}, (ignored if \code{k} is supplied).}

-\item{...}{Parameters passed on to \code{cve.call}.}
+\item{...}{optional parameters passed on to \code{cve.call}.}
 }
 \value{
 an S3 object of class \code{cve} with components:
 \describe{
-   \item{X}{Original training data,}
-   \item{Y}{Responce of original training data,}
+   \item{X}{design matrix of predictor vector used for calculating
+     cve-estimate,}
+   \item{Y}{\eqn{n}-dimensional vector of responses used for calculating
+       cve-estimate,}
   \item{method}{Name of used method,}
   \item{call}{the matched call,}
-   \item{res}{list of components \code{V, L, B, loss, h} and \code{k} for
-              each \eqn{k=min.dim,...,max.dim} (dimension).}
+   \item{res}{list of components \code{V, L, B, loss, h} for
+      each \code{k = min.dim, ..., max.dim}. If \code{k} was supplied in the
+      call \code{min.dim = max.dim = k}.
+      \itemize{
+          \item \code{B} is the cve-estimate with dimension
+              \eqn{p\times k}{p x k}.
+          \item \code{V} is the orthogonal complement of \eqn{B}.
+          \item \code{L} is the loss for each sample seperatels such that
+              it's mean is \code{loss}.
+          \item \code{loss} is the value of the target function that is 
+              minimized, evaluated at \eqn{V}.
+          \item \code{h} bandwidth parameter used to calculate
+              \code{B, V, loss, L}.
+      }
+   }
 }
 }
 \description{
@ -56,7 +76,7 @@ variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
 zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
 is an unknown, continuous non-constant function,
 and \eqn{B = (b_1, ..., b_k)} is
-a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}. 
+a real \eqn{p \times k}{p x k} of rank \eqn{k \leq p}{k <= p}. 
 Without loss of generality \eqn{B} is assumed to be orthonormal.
 }
 \examples{
@ -67,7 +87,7 @@ k <- 2
 b1 <- rep(1 / sqrt(p), p)
 b2 <- (-1)^seq(1, p) / sqrt(p)
 B <- cbind(b1, b2)
-# samplsize
+# sample size
 n <- 200
 set.seed(21)
 # creat predictor data x ~ N(0, I_p)
--- a/CVE_C/man/cve.call.Rd
+++ b/CVE_C/man/cve.call.Rd
@ -10,9 +10,9 @@ cve.call(X, Y, method = "simple", nObs = sqrt(nrow(X)), h = NULL,
  max.iter = 50L, attempts = 10L, logger = NULL)
 }
 \arguments{
-\item{X}{data matrix with samples in its rows.}
+\item{X}{Design matrix with dimension \eqn{n\times p}{n x p}.}

-\item{Y}{Responses (1 dimensional).}
+\item{Y}{numeric array of length \eqn{n} of Responses.}

 \item{method}{specifies the CVE method variation as one of
 \itemize{
@ -34,38 +34,59 @@ estimated bandwidth.}
 \item{k}{Dimension of lower dimensional projection, if \code{k} is given
 only the specified dimension \code{B} matrix is estimated.}

-\item{momentum}{number of [0, 1) giving the ration of momentum for eucledian
-gradient update with a momentum term.}
+\item{momentum}{number of \eqn{[0, 1)} giving the ration of momentum for
+eucledian gradient update with a momentum term. \code{momentum = 0}
+corresponds to normal gradient descend.}

 \item{tau}{Initial step-size.}

 \item{tol}{Tolerance for break condition.}

 \item{slack}{Positive scaling to allow small increases of the loss while
-optimizing.}
+optimizing, i.e. \code{slack = 0.1} allows the target function to
+increase up to \eqn{10 \%} in one optimization step.}

-\item{gamma}{step-size reduction multiple.}
+\item{gamma}{step-size reduction multiple. If gradient step with step size
+\code{tau} is not accepted \code{gamma * tau} is set to the next step
+size.}

 \item{V.init}{Semi-orthogonal matrix of dimensions `(ncol(X), ncol(X) - k)
-as optimization starting value. (If supplied, \code{attempts} is
-set to 1 and \code{k} to match dimension)}
+used as starting value in the optimization. (If supplied,
+\code{attempts} is set to 0 and \code{k} to match dimension).}

 \item{max.iter}{maximum number of optimization steps.}

-\item{attempts}{number of arbitrary different starting points.}
+\item{attempts}{If \code{V.init} not supplied, the optimization is carried
+out \code{attempts} times with starting values drawn from the invariant
+measure on the Stiefel manifold (see \code{\link{rStiefel}}).}

-\item{logger}{a logger function (only for advanced user, significantly slows
-down the computation).}
+\item{logger}{a logger function (only for advanced user, slows down the
+computation).}
 }
 \value{
 an S3 object of class \code{cve} with components:
 \describe{
-   \item{X}{Original training data,}
-   \item{Y}{Responce of original training data,}
+   \item{X}{design matrix of predictor vector used for calculating
+     cve-estimate,}
+   \item{Y}{\eqn{n}-dimensional vector of responses used for calculating
+       cve-estimate,}
   \item{method}{Name of used method,}
   \item{call}{the matched call,}
-   \item{res}{list of components \code{V, L, B, loss, h} and \code{k} for
-              each \eqn{k=min.dim,...,max.dim} (dimension).}
+   \item{res}{list of components \code{V, L, B, loss, h} for
+      each \code{k = min.dim, ..., max.dim}. If \code{k} was supplied in the
+      call \code{min.dim = max.dim = k}.
+      \itemize{
+          \item \code{B} is the cve-estimate with dimension
+              \eqn{p\times k}{p x k}.
+          \item \code{V} is the orthogonal complement of \eqn{B}.
+          \item \code{L} is the loss for each sample seperatels such that
+              it's mean is \code{loss}.
+          \item \code{loss} is the value of the target function that is 
+              minimized, evaluated at \eqn{V}.
+          \item \code{h} bandwidth parameter used to calculate
+              \code{B, V, loss, L}.
+      }
+   }
 }
 }
 \description{
@ -89,7 +110,7 @@ variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
 zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
 is an unknown, continuous non-constant function,
 and \eqn{B = (b_1, ..., b_k)} is
-a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}. 
+a real \eqn{p \times k}{p x k} of rank \eqn{k \leq p}{k <= p}. 
 Without loss of generality \eqn{B} is assumed to be orthonormal.
 }
 \examples{
--- a/CVE/man/dataset.Rd
+++ b/CVE/man/dataset.Rd
@ -0,0 +1,127 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/datasets.R
+\name{dataset}
+\alias{dataset}
+\title{Generates test datasets.}
+\usage{
+dataset(name = "M1", n = NULL, p = 20, sd = 0.5, ...)
+}
+\arguments{
+\item{name}{One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4",}
+\code{"M5"}, \code{"M6"} or \code{"M7"}. Alternative just the dataset number
+1-7.}
+
+\item{n}{number of samples.}
+
+\item{p}{Dimension of random variable \eqn{X}.}
+
+\item{sd}{standard diviation for error term \eqn{\epsilon}.}
+
+\item{...}{Additional parameters only for "M2" (namely \code{pmix} and
+\code{lambda}), see: below.}
+}
+\value{
+List with elements
+\itemize{
+     \item{X}{data, a \eqn{n\times p}{n x p} matrix.}
+     \item{Y}{response.}
+     \item{B}{the dim-reduction matrix}
+     \item{name}{Name of the dataset (name parameter)}
+}
+}
+\description{
+Provides sample datasets M1-M7 used in the paper Conditional variance
+estimation for sufficient dimension reduction, Lukas Fertl, Efstathia Bura.
+The general model is given by:
+\deqn{Y = g(B'X) + \epsilon}
+}
+\section{M1}{
+
+The predictors are distributed as
+\eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, \Sigma)} with
+\eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for
+\eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 1} with a default
+of \eqn{n = 100} data points. \eqn{p = 20},
+\eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, and \eqn{Y} is
+given as \deqn{Y = cos(b_1'X) + \epsilon} where \eqn{\epsilon} is
+distributed as generalized normal distribution with location 0,
+shape-parameter 0.5, and the scale-parameter is chosen such that
+\eqn{Var(\epsilon) = 0.5}.
+}
+
+\section{M2}{
+
+The predictors are distributed as \eqn{X \sim Z 1_p \lambda + N_p(0, I_p)}{X ~ Z 1_p \lambda + N_p(0, I_p)}. with
+\eqn{Z \sim 2 Binom(p_{mix}) - 1\in\{-1, 1\}}{Z~2Binom(pmix)-1} where
+\eqn{1_p} is the \eqn{p}-dimensional vector of one's, for a subspace
+dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points.
+\eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
+and \eqn{Y} is \deqn{Y = cos(b_1'X) + 0.5\epsilon} where \eqn{\epsilon} is
+standard normal.
+Defaults for \code{pmix} is 0.3 and \code{lambda} defaults to 1.
+}
+
+\section{M3}{
+
+The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)}
+for a subspace
+dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points.
+\eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
+and \eqn{Y} is 
+\deqn{Y = 2 log(|b_1'X| + 2) + 0.5\epsilon} where \eqn{\epsilon} is
+standard normal.
+}
+
+\section{M4}{
+
+The predictors are distributed as \eqn{X\sim N_p(0,\Sigma)}{X~N_p(0,\Sigma)}
+with \eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for
+\eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 2} with a default
+of \eqn{n = 100} data points. \eqn{p = 20},
+\eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
+\eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)}
+and \eqn{Y} is given as \deqn{Y = \frac{b_1'X}{0.5 + (1.5 + b_2'X)^2} + 0.5\epsilon}{Y = (b_1'X) / (0.5 + (1.5 + b_2'X)^2) + 0.5\epsilon}
+where \eqn{\epsilon} is standard normal.
+}
+
+\section{M5}{
+
+The predictors are distributed as \eqn{X\sim U([0,1]^p)}{X~U([0, 1]^p)}
+where \eqn{U([0, 1]^p)} is the uniform distribution with
+independent components on the \eqn{p}-dimensional hypercube for a subspace
+dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
+\eqn{p = 20},
+\eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
+\eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)}
+and \eqn{Y} is given as \deqn{Y = cos(\pi b_1'X)(b_2'X + 1)^2 + 0.5\epsilon}
+where \eqn{\epsilon} is standard normal.
+}
+
+\section{M6}{
+
+The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)}
+for a subspace dimension of \eqn{k = 3} with a default of \eqn{n = 200} data
+point. \eqn{p = 20, b_1 = e_1, b_2 = e_2}, and \eqn{b_3 = e_p}, where
+\eqn{e_j} is the \eqn{j}-th unit vector in the \eqn{p}-dimensional space.
+\eqn{Y} is given as \deqn{Y = (b_1'X)^2+(b_2'X)^2+(b_3'X)^2+0.5\epsilon}
+where \eqn{\epsilon} is standard normal.
+}
+
+\section{M7}{
+
+The predictors are distributed as \eqn{X\sim t_3(I_p)}{X~t_3(I_p)} where
+\eqn{t_3(I_p)} is the standard multivariate t-distribution with 3 degrees of
+freedom, for a subspace dimension of \eqn{k = 4} with a default of
+\eqn{n = 200} data points.
+\eqn{p = 20, b_1 = e_1, b_2 = e_2, b_3 = e_3}, and \eqn{b_4 = e_p}, where
+\eqn{e_j} is the \eqn{j}-th unit vector in the \eqn{p}-dimensional space.
+\eqn{Y} is given as \deqn{Y = (b_1'X)(b_2'X)^2+(b_3'X)(b_4'X)+0.5\epsilon}
+where \eqn{\epsilon} is distributed as generalized normal distribution with
+location 0, shape-parameter 1, and the scale-parameter is chosen such that
+\eqn{Var(\epsilon) = 0.25}.
+}
+
+\references{
+Fertl Lukas, Bura Efstathia. (2019), Conditional Variance
+Estimation for Sufficient Dimension Reduction. Working Paper.
+}
--- a/CVE_C/man/directions.cve.Rd
+++ b/CVE_C/man/directions.cve.Rd
@ -8,12 +8,17 @@
 \method{directions}{cve}(dr, k)
 }
 \arguments{
-\item{dr}{Instance of 'cve' as returned by \code{cve}.}
+\item{dr}{Instance of \code{'cve'} as returned by \code{\link{cve}}.}

 \item{k}{SDR dimension to use for projection.}
 }
+\value{
+the \eqn{n\times k}{n x k} dimensional matrix \eqn{X B} where \eqn{B}
+ is the  cve-estimate for dimension \eqn{k}.
+}
 \description{
-Computes projected training data \code{X} for given dimension `k`.
+Projects the dimensional design matrix \eqn{X} on the columnspace of the
+cve-estimate for given dimension \eqn{k}.
 }
 \examples{
 # create B for simulation (k = 1)
--- a/CVE_C/man/estimate.bandwidth.Rd
+++ b/CVE_C/man/estimate.bandwidth.Rd
@ -4,26 +4,33 @@
 \alias{estimate.bandwidth}
 \title{Bandwidth estimation for CVE.}
 \usage{
-estimate.bandwidth(X, k, nObs)
+estimate.bandwidth(X, k, nObs, version = 1L)
 }
 \arguments{
-\item{X}{data matrix with samples in its rows.}
+\item{X}{a \eqn{n\times p}{n x p} matrix with samples in its rows.}

 \item{k}{Dimension of lower dimensional projection.}

-\item{nObs}{number of points in a slice, see \eqn{nObs} in CVE paper.}
+\item{nObs}{number of points in a slice, only for version 2.}
+
+\item{version}{either \code{1} or \code{2}.}
 }
 \value{
 Estimated bandwidth \code{h}.
 }
 \description{
-Estimates a bandwidth \code{h} according
+If no bandwidth or function for calculating it is supplied, the CVE method
+defaults to using the following formula (version 1)
 \deqn{%
-h = (2 * tr(\Sigma) / p) * (1.2 * n^{-1 / (4 + k)})^2}{%
-h = (2 * tr(\Sigma) / p) * (1.2 * n^(\frac{-1}{4 + k}))^2}
-with \eqn{n} the sample size, \eqn{p} its dimension
-(\code{n <- nrow(X); p <- ncol(X)}) and the covariance-matrix \eqn{\Sigma}
-which is \code{(n-1)/n} times the sample covariance estimate.
+   h = \frac{2 tr(\Sigma)}{p} (1.2 n^{\frac{-1}{4 + k}})^2}{%
+   h = (2 * tr(\Sigma) / p) * (1.2 * n^(-1 / (4 + k)))^2}
+Alternative version 2 is used for dimension prediction which is given by
+   \deqn{%
+   h = (2 * tr(\Sigma) / p) * \chi_k^{-1}(\frac{nObs - 1}{n - 1})}{%
+   h = (2 * tr(\Sigma) / p) * \chi_k^-1((nObs - 1) / (n - 1))}
+with \eqn{n} the sample size, \eqn{p} its dimension and the
+covariance-matrix \eqn{\Sigma}, which is \code{(n-1)/n} times the sample
+covariance estimate.
 }
 \examples{
 # set dimensions for simulation model
--- a/CVE_C/man/null.Rd
+++ b/CVE_C/man/null.Rd
--- a/CVE_C/man/plot.cve.Rd
+++ b/CVE_C/man/plot.cve.Rd
@ -13,7 +13,10 @@
 [\code{\link{lines}}]}
 }
 \description{
-Boxplots of the loss from \code{min.dim} to \code{max.dim} \code{k} values.
+Boxplots of the output \code{L} from \code{\link{cve}} over \code{k} from
+\code{min.dim} to \code{max.dim}. For given \code{k}, \code{L} corresponds
+to \eqn{L_n(V, X_i)} where \eqn{V \in S(p, p - k)}{V} is the minimizer of
+\eqn{L_n(V)}, for further details see the paper.
 }
 \examples{
 # create B for simulation
@ -41,6 +44,10 @@ cve.obj.simple <- cve(Y ~ X, h = estimate.bandwidth, nObs = sqrt(nrow(X)))
 # elbow plot
 plot(cve.obj.simple)

+}
+\references{
+Fertl Lukas, Bura Efstathia. (2019), Conditional Variance
+Estimation for Sufficient Dimension Reduction. Working Paper.
 }
 \seealso{
 see \code{\link{par}} for graphical parameters to pass through
--- a/CVE_C/man/predict.cve.Rd
+++ b/CVE_C/man/predict.cve.Rd
@ -20,7 +20,8 @@
 prediced response of data \code{newdata}.
 }
 \description{
-Predict responces using reduced data with \code{\link{mars}}.
+Predict response using projected data where the forward model \eqn{g(B'X)}
+is estimated using \code{\link{mars}}.
 }
 \examples{
 # create B for simulation
@ -50,5 +51,5 @@ yhat <- predict(cve.obj.simple, x.test, 1)
 plot(yhat, y.test)
 }
 \seealso{
-\code{\link{cve}}, \code{\link{cve.call}} or \pkg{\link{mars}}.
+\code{\link{cve}}, \code{\link{cve.call}} and \pkg{\link{mars}}.
 }
--- a/CVE_C/man/predict_dim.Rd
+++ b/CVE_C/man/predict_dim.Rd
@ -2,26 +2,40 @@
 % Please edit documentation in R/predict_dim.R
 \name{predict_dim}
 \alias{predict_dim}
-\title{Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.}
+\title{\code{"TODO: @Lukas"}}
 \usage{
-predict_dim(object, ...)
+predict_dim(object, ..., method = "CV")
 }
 \arguments{
-\item{object}{instance of class \code{cve} (result of \code{cve},
-\code{cve.call}).}
+\item{object}{instance of class \code{cve} (result of \code{\link{cve}},
+\code{\link{cve.call}}).}

 \item{...}{ignored.}
+
+\item{method}{one of \code{"CV"}, \code{"elbow"} or \code{"wilcoxon"}.}
 }
 \value{
-list with
-\itemize{
-   \item MSE: Mean Square Error,
-   \item k: predicted dimensions.
-}
+list with \code{"k"} the predicted dimension and method dependent 
+     informatoin.
 }
 \description{
-Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.
+\code{"TODO: @Lukas"}
 }
+\section{Method cv}{
+
+TODO: \code{"TODO: @Lukas"}.
+}
+
+\section{Method elbow}{
+
+TODO: \code{"TODO: @Lukas"}.
+}
+
+\section{Method wilcoxon}{
+
+TODO: \code{"TODO: @Lukas"}.
+}
+
 \examples{
 # create B for simulation
 B <- rep(1, 5) / sqrt(5)
--- a/CVE_C/man/rStiefel.Rd
+++ b/CVE_C/man/rStiefel.Rd
@ -2,7 +2,8 @@
 % Please edit documentation in R/util.R
 \name{rStiefel}
 \alias{rStiefel}
-\title{Draws a sample from the invariant measure on the Stiefel manifold \eqn{S(p, q)}.}
+\title{Draws a sample from the invariant measure on the Stiefel manifold
+\eqn{S(p, q)}.}
 \usage{
 rStiefel(p, q)
 }
@ -12,10 +13,11 @@ rStiefel(p, q)
 \item{q}{col dimension}
 }
 \value{
-\code{p} times \code{q} semi-orthogonal matrix.
+\eqn{p \times q}{p x q} semi-orthogonal matrix.
 }
 \description{
-Draws a sample from the invariant measure on the Stiefel manifold \eqn{S(p, q)}.
+Draws a sample from the invariant measure on the Stiefel manifold
+\eqn{S(p, q)}.
 }
 \examples{
 V <- rStiefel(6, 4)
--- a/CVE/man/rgnorm.Rd
+++ b/CVE/man/rgnorm.Rd
@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/datasets.R
+\name{rgnorm}
+\alias{rgnorm}
+\title{Generalized Normal Distribution.}
+\usage{
+rgnorm(n = 1, mu = 0, alpha = 1, beta = 1)
+}
+\arguments{
+\item{n}{Number of generated samples.}
+
+\item{mu}{mean.}
+
+\item{alpha}{first shape parameter.}
+
+\item{beta}{second shape parameter.}
+}
+\value{
+numeric array of length \eqn{n}.
+}
+\description{
+Random generation for generalized Normal Distribution.
+}
+\seealso{
+https://en.wikipedia.org/wiki/Generalized_normal_distribution
+}
+\keyword{internal}
--- a/CVE/man/rlaplace.Rd
+++ b/CVE/man/rlaplace.Rd
@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/datasets.R
+\name{rlaplace}
+\alias{rlaplace}
+\title{Laplace distribution}
+\usage{
+rlaplace(n = 1, mu = 0, sd = 1)
+}
+\arguments{
+\item{n}{Number of generated samples.}
+
+\item{mu}{mean.}
+
+\item{sd}{standard deviation.}
+}
+\value{
+numeric array of length \eqn{n}.
+}
+\description{
+Random generation for Laplace distribution.
+}
+\seealso{
+https://en.wikipedia.org/wiki/Laplace_distribution
+}
+\keyword{internal}
--- a/CVE/man/rmvnorm.Rd
+++ b/CVE/man/rmvnorm.Rd
@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/datasets.R
+\name{rmvnorm}
+\alias{rmvnorm}
+\title{Multivariate Normal Distribution.}
+\usage{
+rmvnorm(n = 1, mu = rep(0, p), sigma = diag(p))
+}
+\arguments{
+\item{n}{number of samples.}
+
+\item{mu}{mean}
+
+\item{sigma}{covariance matrix.}
+}
+\value{
+a \eqn{n\times p}{n x p} matrix with samples in its rows.
+}
+\description{
+Random generation for the multivariate normal distribution.
+\deqn{X \sim N_p(\mu, \Sigma)}{X ~ N_p(\mu, \Sigma)}
+}
+\examples{
+\dontrun{
+rmvnorm(20, sigma = matrix(c(2, 1, 1, 2), 2))
+rmvnorm(20, mu = c(3, -1, 2))
+}
+}
+\keyword{internal}
--- a/CVE/man/rmvt.Rd
+++ b/CVE/man/rmvt.Rd
@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/datasets.R
+\name{rmvt}
+\alias{rmvt}
+\title{Multivariate t distribution.}
+\usage{
+rmvt(n = 1, mu = rep(0, p), sigma = diag(p), df = Inf)
+}
+\arguments{
+\item{n}{number of samples.}
+
+\item{mu}{mean}
+
+\item{sigma}{a \eqn{k\times k}{k x k} positive definite matrix. If the degree
+\eqn{\nu} if bigger than 2 the created covariance is
+\deqn{var(x) = \Sigma\frac{\nu}{\nu - 2}}
+for \eqn{\nu > 2}.}
+
+\item{df}{degree of freedom \eqn{\nu}.}
+}
+\value{
+a \eqn{n\times p}{n x p} matrix with samples in its rows.
+}
+\description{
+Random generation from multivariate t distribution (student distribution).
+}
+\examples{
+\dontrun{
+rmvt(20, c(0, 1), matrix(c(3, 1, 1, 2), 2), 3)
+rmvt(20, sigma = matrix(c(2, 1, 1, 2), 2), 3)
+rmvt(20, mu = c(3, -1, 2), 3)
+}
+}
+\keyword{internal}
--- a/CVE_C/man/summary.cve.Rd
+++ b/CVE_C/man/summary.cve.Rd
@ -7,12 +7,13 @@
 \method{summary}{cve}(object, ...)
 }
 \arguments{
-\item{object}{Instance of 'cve' as returned by \code{cve}.}
+\item{object}{Instance of \code{"cve"} as returned by \code{\link{cve}}.}

 \item{...}{ignored.}
 }
 \description{
-Prints a summary of a \code{cve} result.
+Prints a summary statistics of output \code{L} from \code{cve} for
+\code{k = min.dim, ..., max.dim}.
 }
 \examples{
 # create B for simulation
--- a/CVE_C/src/Makevars
+++ b/CVE_C/src/Makevars
--- a/CVE_C/src/Makevars.win
+++ b/CVE_C/src/Makevars.win
--- a/CVE_C/src/callLogger.c
+++ b/CVE_C/src/callLogger.c
--- a/CVE_C/src/cve.c
+++ b/CVE_C/src/cve.c
--- a/CVE_C/src/cve.h
+++ b/CVE_C/src/cve.h
--- a/CVE_C/src/cve_subroutines.c
+++ b/CVE_C/src/cve_subroutines.c
@ -161,97 +161,3 @@ mat* adjacence(const mat *vec_L, const mat *vec_Y, const mat *vec_y1,

    return mat_S;
 }
-
-// int getWorkLen(const int n, const int p, const int q) {
-//     int mpq; /**< Max of p and q */
-//     int nn = ((n - 1) * n) / 2;
-
-//     if (p > q) {
-//         mpq = p;
-//     } else {
-//         mpq = q;
-//     }
-//     if (nn * p < (mpq + 1) * mpq) {
-//         return 2 * (mpq + 1) * mpq;
-//     } else {
-//         return (nn + mpq) * mpq;
-//     }
-// }
-
-// double cost(const unsigned int method,
-//             const int n,
-//             const double *Y,
-//             const double *vecK,
-//             const double *colSums,
-//             double *y1, double *L) {
-//     int i, j, k;
-//     double tmp, sum;
-
-//     for (i = 0; i < n; ++i) {
-//         y1[i] = Y[i];
-//         L[i] = Y[i] * Y[i];
-//     }
-
-//     for (k = j = 0; j < n; ++j) {
-//         for (i = j + 1; i < n; ++i, ++k) {
-//             y1[i] += Y[j] * vecK[k];
-//             y1[j] += Y[i] * vecK[k];
-//             L[i] += Y[j] * Y[j] * vecK[k];
-//             L[j] += Y[i] * Y[i] * vecK[k];
-//         }
-//     }
-
-//     for (i = 0; i < n; ++i) {
-//         y1[i] /= colSums[i];
-//         L[i] /= colSums[i];
-//     }
-
-//     tmp = 0.0;
-//     if (method == CVE_METHOD_WEIGHTED) {
-//         sum = 0.0;
-//         for (i = 0; i < n; ++i) {
-//             tmp += (colSums[i] - 1.0) * (L[i] -= y1[i] * y1[i]);
-//             sum += colSums[i];
-//         }
-//         return tmp / (sum - (double)n); // TODO: check for division by zero!
-//     } else {
-//         for (i = 0; i < n; ++i) {
-//             tmp += (L[i] -= y1[i] * y1[i]);
-//         }
-//         return tmp / (double)n;
-//     }
-// }
-
-// void scaling(const unsigned int method,
-//              const int n,
-//              const double *Y, const double *y1, const double *L,
-//              const double *vecD, const double *vecK,
-//              const double *colSums,
-//              double *vecS) {
-//     int i, j, k, nn = (n * (n - 1)) / 2;
-//     double tmp;
-
-//     if (method == CVE_METHOD_WEIGHTED) {
-//         for (k = j = 0; j < n; ++j) {
-//             for (i = j + 1; i < n; ++i, ++k) {
-//                 tmp = Y[j] - y1[i];
-//                 vecS[k]  = (L[i] - (tmp * tmp));
-//                 tmp = Y[i] - y1[j];
-//                 vecS[k] += (L[j] - (tmp * tmp));
-//             }
-//         }
-//     } else {
-//         for (k = j = 0; j < n; ++j) {
-//             for (i = j + 1; i < n; ++i, ++k) {
-//                 tmp = Y[j] - y1[i];
-//                 vecS[k]  = (L[i] - (tmp * tmp)) / colSums[i];
-//                 tmp = Y[i] - y1[j];
-//                 vecS[k] += (L[j] - (tmp * tmp)) / colSums[j];
-//             }
-//         }
-//     }
-
-//     for (k = 0; k < nn; ++k) {
-//         vecS[k] *= vecK[k] * vecD[k];
-//     }
-// }
--- a/CVE_C/src/export.c
+++ b/CVE_C/src/export.c
--- a/CVE_C/src/init.c
+++ b/CVE_C/src/init.c
@ -17,8 +17,8 @@ static const R_CallMethodDef CallEntries[] = {
    {NULL, NULL, 0}
 };

-/* Restrict C entrypoints to registered routines. */
-void R_initCVE(DllInfo *dll) {
+/* Restrict C entry points to registered routines. */
+void R_init_CVE(DllInfo *dll) {
    R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
    R_useDynamicSymbols(dll, FALSE);
 }
--- a/CVE_C/src/matrix.c
+++ b/CVE_C/src/matrix.c
@ -914,7 +914,7 @@ mat* laplace(mat *A, double *workMem) {
 *      \_____/     \_____/
 *        IpA   C =   ImA   B
 *                  \_______/
- *        IpA   C =     Y     ==>  C = IpA^-1 Y
+ *        IpA   C =     Y     ==> C = IpA^-1 Y
 *
 * @param A Skew-Symmetric matrix of dimension `(n, n)`.
 * @param B Matrix of dimensions `(n, m)` with `m <= n`.
--- a/CVE_C/src/rStiefel.c
+++ b/CVE_C/src/rStiefel.c
--- a/CVE_C/R/datasets.R
+++ b/CVE_C/R/datasets.R
@ -1,193 +0,0 @@
-#'
-#' @param n number of samples.
-#' @param mu mean
-#' @param sigma covariance matrix.
-#'
-#' @returns a \eqn{n\times p} matrix with samples in its rows.
-#'
-#' @examples
-#' rmvnorm(20, sigma = matrix(c(2, 1, 1, 2), 2))
-#' rmvnorm(20, mu = c(3, -1, 2))
-rmvnorm <- function(n = 1, mu = rep(0, p), sigma = diag(p)) {
-    if (!missing(sigma)) {
-        p <- nrow(sigma)
-    } else if (!missing(mu)) {
-        mu <- matrix(mu, ncol = 1)
-        p <- nrow(mu)
-    } else {
-        stop("At least one of 'mu' or 'sigma' must be supplied.")
-    }
-
-    # See: https://en.wikipedia.org/wiki/Multivariate_normal_distribution
-    return(rep(mu, each = n) + matrix(rnorm(n * p), n) %*% chol(sigma))
-}
-
-#' Samples from the multivariate t distribution (student distribution).
-#'
-#' @param n number of samples.
-#' @param mu mean, ... TODO:
-#' @param sigma a \eqn{k\times k} positive definite matrix. If the degree
-#' \eqn{\nu} if bigger than 2 the created covariance is
-#' \deqn{var(x) = \Sigma\frac{\nu}{\nu - 2}}
-#' for \eqn{\nu > 2}.
-#' @param df degree of freedom \eqn{\nu}.
-#'
-#' @returns a \eqn{n\times p} matrix with samples in its rows.
-#'
-#' @examples
-#' rmvt(20, c(0, 1), matrix(c(3, 1, 1, 2), 2), 3)
-#' rmvt(20, sigma = matrix(c(2, 1, 1, 2), 2), 3)
-#' rmvt(20, mu = c(3, -1, 2), 3)
-rmvt <- function(n = 1, mu = rep(0, p), sigma = diag(p), df = Inf) {
-    if (!missing(sigma)) {
-        p <- nrow(sigma)
-    } else if (!missing(mu)) {
-        mu <- matrix(mu, ncol = 1)
-        p <- nrow(mu)
-    } else {
-        stop("At least one of 'mu' or 'sigma' must be supplied.")
-    }
-
-    if (df == Inf) {
-        Z <- 1
-    } else {
-        Z <- sqrt(df / rchisq(n, df))
-    }
-
-    return(rmvnorm(n, sigma = sigma) * Z + rep(mu, each = n))
-}
-
-
-#' Generalized Normal Distribution.
-#' see: https://en.wikipedia.org/wiki/Generalized_normal_distribution
-rgnorm <- function(n = 1, mu = 0, alpha = 1, beta = 1) {
-    if (alpha <= 0 | beta <= 0) {
-        stop("alpha and beta must be positive.")
-    }
-    lambda <- (1 / alpha)^beta
-    scales <- qgamma(runif(n), shape = 1 / beta, scale = 1 / lambda)^(1 / beta)
-    return(scales * ((-1)^rbinom(n, 1, 0.5)) + mu)
-}
-
-#' Laplace distribution
-#' see: https://en.wikipedia.org/wiki/Laplace_distribution
-rlaplace <- function(n = 1, mu = 0, sigma = 1) {
-    U <- runif(n, -0.5, 0.5)
-    scale <- sigma / sqrt(2)
-
-    return(mu - scale * sign(U) * log(1 - 2 * abs(U)))
-}
-
-#' Generates test datasets.
-#'
-#' Provides sample datasets. There are 5 different datasets named
-#' M1, M2, M3, M4 and M5 described in the paper references below.
-#' The general model is given by:
-#' \deqn{Y = g(B'X) + \epsilon}
-#'
-#' @param name One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}
-#' @param n nr samples
-#' @param B SDR basis used for dataset creation if supplied.
-#' @param p Dim. of random variable \code{X}.
-#' @param p.mix Only for \code{"M4"}, see: below.
-#' @param lambda Only for \code{"M4"}, see: below.
-#'
-#' @return List with elements
-#' \itemize{
-#'      \item{X}{data}
-#'      \item{Y}{response}
-#'      \item{B}{Used dim-reduction matrix}
-#'      \item{name}{Name of the dataset (name parameter)}
-#' }
-#'
-#' @section M1:
-#' The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
-#' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
-#' The link function \eqn{g} is given as
-#' \deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + \epsilon / 2}{%
-#'       g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + epsilon / 2}
-#' @section M2:
-#' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a
-#' default of \eqn{n = 200} data points.
-#' The link function \eqn{g} is given as
-#' \deqn{g(x) = (b_1^T X) (b_2^T X)^2 + \epsilon / 2}
-#' @section M3:
-#' \deqn{g(x) = cos(b_1^T X) + \epsilon / 2}
-#' @section M4:
-#' TODO:
-#' @section M5:
-#' TODO:
-#'
-#' @import stats
-#' @importFrom stats rnorm rbinom
-#' @export
-dataset <- function(name = "M1", n = NULL, p = 20, sigma = 0.5, ...) {
-    name <- toupper(name)
-    if (nchar(name) == 1) { name <- paste0("M", name) }
-
-    if (name == "M1") {
-        if (missing(n)) { n <- 100 }
-        # B ... `p x 1`
-        B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
-        X <- rmvnorm(n, sigma = sigma^abs(outer(1:p, 1:p, FUN = `-`)))
-        beta <- 0.5
-        Y <- cos(X %*% B) + rgnorm(n, 0,
-            alpha = sqrt(0.25 * gamma(1 / beta) / gamma(3 / beta)),
-            beta = beta
-        )
-    } else if (name == "M2") {
-        if (missing(n)) { n <- 100 }
-        prob <- 0.3
-        lambda <- 1 # dispersion
-        # B ... `p x 1`
-        B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
-        Z <- 2 * rbinom(n, 1, prob) - 1
-        X <- matrix(rep(lambda * Z, p) + rnorm(n * p), n)
-        Y <- cos(X %*% B) + rnorm(n, 0, sigma)
-    } else if (name == "M3") {
-        if (missing(n)) { n <- 200 }
-        # B ... `p x 1`
-        B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
-        X <- matrix(rnorm(n * p), n)
-        Y <- 1.5 * log(2 + abs(X %*% B)) + rnorm(n, 0, sigma^2)
-    } else if (name == "M4") {
-        if (missing(n)) { n <- 200 }
-        # B ... `p x 2`
-        B <- cbind(
-            c(rep(1 / sqrt(6), 6), rep(0, p - 6)),
-            c(rep(c(1, -1), 3) / sqrt(6), rep(0, p - 6))
-        )
-        X <- rmvnorm(n, sigma = sigma^abs(outer(1:p, 1:p, FUN = `-`)))
-        XB <- X %*% B
-        Y <- (XB[, 1]) / (0.5 + (XB[, 2] + 1.5)^2) + rnorm(n, 0, sigma^2)
-    } else if (name == "M5") {
-        if (missing(n)) { n <- 200 }
-        # B ... `p x 2`
-        B <- cbind(
-            c(rep(1,        6), rep(0, p - 6)),
-            c(rep(c(1, -1), 3), rep(0, p - 6))
-        ) / sqrt(6)
-        X <- matrix(runif(n * p), n)
-        XB <- X %*% B
-        Y <- cos(XB[, 1] * pi) * (XB[, 2] + 1)^2 + rnorm(n, 0, sigma^2)
-    } else if (name == "M6") {
-        if (missing(n)) { n <- 200 }
-        # B ... `p x 3`
-        B <- diag(p)[, -(3:(p - 1))]
-        X <- matrix(rnorm(n * p), n)
-        Y <- rowSums((X %*% B)^2) + rnorm(n, 0, sigma^2)
-    } else if (name == "M7") {
-        if (missing(n)) { n <- 400 }
-        # B ... `p x 4`
-        B <- diag(p)[, -(4:(p - 1))]
-        # "R"andom "M"ulti"V"ariate "S"tudent
-        X <- rmvt(n = n, sigma = diag(p), df = 3)
-        XB <- X %*% B
-        Y <- (XB[, 1]) * (XB[, 2])^2 + (XB[, 3]) * (XB[, 4])
-        Y <- Y + rlaplace(n, 0, sigma)
-    } else {
-        stop("Got unknown dataset name.")
-    }
-
-    return(list(X = X, Y = Y, B = B, name = name))
-}
--- a/CVE_C/R/util.R
+++ b/CVE_C/R/util.R
@ -1,82 +0,0 @@
-#' Draws a sample from the invariant measure on the Stiefel manifold \eqn{S(p, q)}.
-#'
-#' @param p row dimension
-#' @param q col dimension
-#' @return \code{p} times \code{q} semi-orthogonal matrix.
-#' @examples
-#'  V <- rStiefel(6, 4)
-#' @export
-rStiefel <- function(p, q) {
-    return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
-}
-
-#' Retraction to the manifold.
-#'
-#' @param A matrix.
-#' @return `(p, q)` semi-orthogonal matrix, aka element of the Stiefel manifold.
-#' @keywords internal
-#' @export
-retractStiefel <- function(A) {
-    return(qr.Q(qr(A)))
-}
-
-#' Skew-Symmetric matrix computed from `A` as
-#' \eqn{1/2 (A - A^T)}.
-#' @param A Matrix of dim `(p, q)`
-#' @return Skew-Symmetric matrix of dim `(p, p)`.
-#' @keywords internal
-#' @export
-skew <- function(A) {
-    0.5 * (A - t(A))
-}
-
-#' Symmetric matrix computed from `A` as
-#' \eqn{1/2 (A + A^T)}.
-#' @param A Matrix of dim `(p, q)`
-#' @return Symmetric matrix of dim `(p, p)`.
-#' @keywords internal
-#' @export
-sym <- function(A) {
-    0.5 * (A + t(A))
-}
-
-#' Orthogonal Projection onto the tangent space of the stiefel manifold.
-#'
-#' @param V Point on the stiefel manifold.
-#' @param G matrix to be projected onto the tangent space at `V`.
-#' @return `(p, q)` matrix as element of the tangent space at `V`.
-#' @keywords internal
-#' @export
-projTangentStiefel <- function(V, G) {
-    Q <- diag(1, nrow(V)) - V %*% t(V)
-    return(Q %*% G + V %*% skew(t(V) %*% G))
-}
-
-#' Null space basis of given matrix `V`
-#'
-#' @param V `(p, q)` matrix
-#' @return Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`.
-#' @keywords internal
-#' @export
-null <- function(V) {
-    tmp <- qr(V)
-    set <- if(tmp$rank == 0L) seq_len(ncol(V)) else -seq_len(tmp$rank)
-    return(qr.Q(tmp, complete = TRUE)[, set, drop = FALSE])
-}
-
-#' Creates a (numeric) matrix where each column contains
-#' an element to element matching.
-#' @param elements numeric vector of elements to match
-#' @return matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`.
-#' @keywords internal
-#' @examples
-#'  elem.pairs(seq.int(2, 5))
-#' @export
-elem.pairs <- function(elements) {
-    # Number of elements to match.
-    n <- length(elements)
-    # Create all combinations.
-    pairs <- rbind(rep(elements, each=n), rep(elements, n))
-    # Select unique combinations without self interaction.
-    return(pairs[, pairs[1, ] < pairs[2, ]])
-}
--- a/CVE_C/man/dataset.Rd
+++ b/CVE_C/man/dataset.Rd
@ -1,68 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/datasets.R
-\name{dataset}
-\alias{dataset}
-\title{Generates test datasets.}
-\usage{
-dataset(name = "M1", n, B, p.mix = 0.3, lambda = 1)
-}
-\arguments{
-\item{name}{One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}}
-
-\item{n}{nr samples}
-
-\item{B}{SDR basis used for dataset creation if supplied.}
-
-\item{p.mix}{Only for \code{"M4"}, see: below.}
-
-\item{lambda}{Only for \code{"M4"}, see: below.}
-
-\item{p}{Dim. of random variable \code{X}.}
-}
-\value{
-List with elements
-\itemize{
-     \item{X}{data}
-     \item{Y}{response}
-     \item{B}{Used dim-reduction matrix}
-     \item{name}{Name of the dataset (name parameter)}
-}
-}
-\description{
-Provides sample datasets. There are 5 different datasets named
-M1, M2, M3, M4 and M5 described in the paper references below.
-The general model is given by:
-\deqn{Y = g(B'X) + \epsilon}
-}
-\section{M1}{
-
-The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
-dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
-The link function \eqn{g} is given as
-\deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + \epsilon / 2}{%
-      g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + epsilon / 2}
-}
-
-\section{M2}{
-
-\eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a
-default of \eqn{n = 200} data points.
-The link function \eqn{g} is given as
-\deqn{g(x) = (b_1^T X) (b_2^T X)^2 + \epsilon / 2}
-}
-
-\section{M3}{
-
-\deqn{g(x) = cos(b_1^T X) + \epsilon / 2}
-}
-
-\section{M4}{
-
-TODO:
-}
-
-\section{M5}{
-
-TODO:
-}
-
--- a/CVE_C/man/elem.pairs.Rd
+++ b/CVE_C/man/elem.pairs.Rd
@ -1,23 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/util.R
-\name{elem.pairs}
-\alias{elem.pairs}
-\title{Creates a (numeric) matrix where each column contains
-an element to element matching.}
-\usage{
-elem.pairs(elements)
-}
-\arguments{
-\item{elements}{numeric vector of elements to match}
-}
-\value{
-matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`.
-}
-\description{
-Creates a (numeric) matrix where each column contains
-an element to element matching.
-}
-\examples{
- elem.pairs(seq.int(2, 5))
-}
-\keyword{internal}
--- a/CVE_C/man/projTangentStiefel.Rd
+++ b/CVE_C/man/projTangentStiefel.Rd
@ -1,20 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/util.R
-\name{projTangentStiefel}
-\alias{projTangentStiefel}
-\title{Orthogonal Projection onto the tangent space of the stiefel manifold.}
-\usage{
-projTangentStiefel(V, G)
-}
-\arguments{
-\item{V}{Point on the stiefel manifold.}
-
-\item{G}{matrix to be projected onto the tangent space at `V`.}
-}
-\value{
-`(p, q)` matrix as element of the tangent space at `V`.
-}
-\description{
-Orthogonal Projection onto the tangent space of the stiefel manifold.
-}
-\keyword{internal}
--- a/CVE_C/man/retractStiefel.Rd
+++ b/CVE_C/man/retractStiefel.Rd
@ -1,18 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/util.R
-\name{retractStiefel}
-\alias{retractStiefel}
-\title{Retraction to the manifold.}
-\usage{
-retractStiefel(A)
-}
-\arguments{
-\item{A}{matrix.}
-}
-\value{
-`(p, q)` semi-orthogonal matrix, aka element of the Stiefel manifold.
-}
-\description{
-Retraction to the manifold.
-}
-\keyword{internal}
--- a/CVE_C/man/skew.Rd
+++ b/CVE_C/man/skew.Rd
@ -1,20 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/util.R
-\name{skew}
-\alias{skew}
-\title{Skew-Symmetric matrix computed from `A` as
-\eqn{1/2 (A - A^T)}.}
-\usage{
-skew(A)
-}
-\arguments{
-\item{A}{Matrix of dim `(p, q)`}
-}
-\value{
-Skew-Symmetric matrix of dim `(p, p)`.
-}
-\description{
-Skew-Symmetric matrix computed from `A` as
-\eqn{1/2 (A - A^T)}.
-}
-\keyword{internal}
--- a/CVE_C/man/sym.Rd
+++ b/CVE_C/man/sym.Rd
@ -1,20 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/util.R
-\name{sym}
-\alias{sym}
-\title{Symmetric matrix computed from `A` as
-\eqn{1/2 (A + A^T)}.}
-\usage{
-sym(A)
-}
-\arguments{
-\item{A}{Matrix of dim `(p, q)`}
-}
-\value{
-Symmetric matrix of dim `(p, p)`.
-}
-\description{
-Symmetric matrix computed from `A` as
-\eqn{1/2 (A + A^T)}.
-}
-\keyword{internal}
--- a/CVE_R/DESCRIPTION
+++ b/CVE_R/DESCRIPTION
@ -1,11 +0,0 @@
-Package: CVEpureR
-Type: Package
-Title: Conditional Variance Estimator for Sufficient Dimension Reduction
-Version: 0.1
-Date: 2019-08-29
-Author: Loki
-Maintainer: Loki <loki@no.mail>
-Description: Implementation of the Conditional Variance Estimation (CVE) method. This package version is writen in pure R.
-License: GPL-3
-Encoding: UTF-8
-RoxygenNote: 6.1.1
--- a/CVE_R/NAMESPACE
+++ b/CVE_R/NAMESPACE
@ -1,23 +0,0 @@
-# Generated by roxygen2: do not edit by hand
-
-S3method(plot,cve)
-S3method(summary,cve)
-export(cve)
-export(cve.call)
-export(cve.grid.search)
-export(cve_linesearch)
-export(cve_sgd)
-export(cve_simple)
-export(dataset)
-export(elem.pairs)
-export(estimate.bandwidth)
-export(grad)
-export(null)
-export(rStiefl)
-import(stats)
-importFrom(graphics,lines)
-importFrom(graphics,plot)
-importFrom(graphics,points)
-importFrom(stats,model.frame)
-importFrom(stats,rbinom)
-importFrom(stats,rnorm)
--- a/CVE_R/R/CVE.R
+++ b/CVE_R/R/CVE.R
@ -1,265 +0,0 @@
-#' Conditional Variance Estimator (CVE)
-#'
-#' Conditional Variance Estimator for Sufficient Dimension
-#'  Reduction
-#'
-#' TODO: And some details
-#'
-#'
-#' @references Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
-#'
-#' @docType package
-#' @author Loki
-#' @useDynLib CVE, .registration = TRUE
-"_PACKAGE"
-
-#' Implementation of the CVE method.
-#'
-#' Conditional Variance Estimator (CVE) is a novel sufficient dimension
-#' reduction (SDR) method assuming a model
-#' \deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
-#' where B'X is a lower dimensional projection of the predictors.
-#'
-#' @param formula Formel for the regression model defining `X`, `Y`.
-#'  See: \code{\link{formula}}.
-#' @param data data.frame holding data for formula.
-#' @param method The different only differe in the used optimization.
-#'  All of them are Gradient based optimization on a Stiefel manifold.
-#' \itemize{
-#'      \item "simple" Simple reduction of stepsize.
-#'      \item "sgd" stocastic gradient decent.
-#'      \item TODO: further
-#' }
-#' @param ... Further parameters depending on the used method.
-#' @examples
-#' library(CVE)
-#'
-#' # sample dataset
-#' ds <- dataset("M5")
-#'
-#' # call ´cve´ with default method (aka "simple")
-#' dr.simple <- cve(ds$Y ~ ds$X, k = ncol(ds$B))
-#' # plot optimization history (loss via iteration)
-#' plot(dr.simple, main = "CVE M5 simple")
-#'
-#' # call ´cve´ with method "linesearch" using ´data.frame´ as data.
-#' data <- data.frame(Y = ds$Y, X = ds$X)
-#' # Note: ´Y, X´ are NOT defined, they are extracted from ´data´.
-#' dr.linesearch <- cve(Y ~ ., data, method = "linesearch", k = ncol(ds$B))
-#' plot(dr.linesearch, main = "CVE M5 linesearch")
-#'
-#' @references Fertl L., Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
-#'
-#' @seealso \code{\link{formula}}. For a complete parameters list (dependent on
-#'  the method) see \code{\link{cve_simple}}, \code{\link{cve_sgd}}
-#' @import stats
-#' @importFrom stats model.frame
-#' @export
-cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
-    # check for type of `data` if supplied and set default
-    if (missing(data)) {
-        data <- environment(formula)
-    } else if (!is.data.frame(data)) {
-        stop("Parameter 'data' must be a 'data.frame' or missing.")
-    }
-
-    # extract `X`, `Y` from `formula` with `data`
-    model <- stats::model.frame(formula, data)
-    X <- as.matrix(model[ ,-1L, drop = FALSE])
-    Y <- as.double(model[ , 1L])
-
-    # pass extracted data on to [cve.call()]
-    dr <- cve.call(X, Y, method = method, max.dim = max.dim, ...)
-
-    # overwrite `call` property from [cve.call()]
-    dr$call <- match.call()
-    return(dr)
-}
-
-#' @param nObs as described in the Paper.
-#' @param X Data
-#' @param Y Responces
-#' @param nObs Like in the paper.
-#' @param k guess for SDR dimension.
-#' @param ... Method specific parameters.
-#' @rdname cve
-#' @export
-cve.call <- function(X, Y, method = "simple",
-                     nObs = sqrt(nrow(X)), h = NULL,
-                     min.dim = 1L, max.dim = 10L, k = NULL,
-                     tau = 1.0, tol = 1e-3,
-                     epochs = 50L, attempts = 10L,
-                     logger = NULL) {
-
-    # parameter checking
-    if (!(is.matrix(X) && is.numeric(X))) {
-        stop("Parameter 'X' should be a numeric matrices.")
-    }
-    if (!is.numeric(Y)) {
-        stop("Parameter 'Y' must be numeric.")
-    }
-    if (is.matrix(Y) || !is.double(Y)) {
-        Y <- as.double(Y)
-    }
-    if (nrow(X) != length(Y)) {
-        stop("Rows of 'X' and 'Y' elements are not compatible.")
-    }
-    if (ncol(X) < 2) {
-        stop("'X' is one dimensional, no need for dimension reduction.")
-    }
-
-    if (missing(k) || is.null(k)) {
-        min.dim <- as.integer(min.dim)
-        max.dim <- as.integer(min(max.dim, ncol(X) - 1L))
-    } else {
-        min.dim <- as.integer(k)
-        max.dim <- as.integer(k)
-    }
-    if (min.dim > max.dim) {
-        stop("'min.dim' bigger 'max.dim'.")
-    }
-    if (max.dim >= ncol(X)) {
-        stop("'max.dim' (or 'k') must be smaller than 'ncol(X)'.")
-    }
-
-    if (is.function(h)) {
-        estimate.bandwidth <- h
-        h <- NULL
-    }
-
-    if (!is.numeric(tau) || length(tau) > 1L || tau <= 0.0) {
-        stop("Initial step-width 'tau' must be positive number.")
-    } else {
-        tau <- as.double(tau)
-    }
-    if (!is.numeric(tol) || length(tol) > 1L || tol < 0.0) {
-        stop("Break condition tolerance 'tol' must be not negative number.")
-    } else {
-        tol <- as.double(tol)
-    }
-
-    if (!is.numeric(epochs) || length(epochs) > 1L) {
-        stop("Parameter 'epochs' must be positive integer.")
-    } else if (!is.integer(epochs)) {
-        epochs <- as.integer(epochs)
-    }
-    if (epochs < 1L) {
-        stop("Parameter 'epochs' must be at least 1L.")
-    }
-    if (!is.numeric(attempts) || length(attempts) > 1L) {
-        stop("Parameter 'attempts' must be positive integer.")
-    } else if (!is.integer(attempts)) {
-        attempts <- as.integer(attempts)
-    }
-    if (attempts < 1L) {
-        stop("Parameter 'attempts' must be at least 1L.")
-    }
-
-    if (is.function(logger)) {
-        loggerEnv <- environment(logger)
-    } else {
-        loggerEnv <- NULL
-    }
-
-    # Call specified method.
-    method <- tolower(method)
-    call <- match.call()
-    dr <- list()
-    for (k in min.dim:max.dim) {
-
-        if (missing(h) || is.null(h)) {
-            h <- estimate.bandwidth(X, k, nObs)
-        } else if (is.numeric(h) && h > 0.0) {
-            h <- as.double(h)
-        } else {
-            stop("Bandwidth 'h' must be positive numeric.")
-        }
-
-        if (method == 'simple') {
-            dr.k <- .Call('cve_simple', PACKAGE = 'CVE',
-                          X, Y, k, h,
-                          tau, tol,
-                          epochs, attempts,
-                          logger, loggerEnv)
-        #     dr.k <- cve_simple(X, Y, k, nObs = nObs, ...)
-        # } else if (method == 'linesearch') {
-        #     dr.k <- cve_linesearch(X, Y, k, nObs = nObs, ...)
-        # } else if (method == 'rcg') {
-        #     dr.k <- cve_rcg(X, Y, k, nObs = nObs, ...)
-        # } else if (method == 'momentum') {
-        #     dr.k <- cve_momentum(X, Y, k, nObs = nObs, ...)
-        # } else if (method == 'rmsprob') {
-        #     dr.k <- cve_rmsprob(X, Y, k, nObs = nObs, ...)
-        # } else if (method == 'sgdrmsprob') {
-        #     dr.k <- cve_sgdrmsprob(X, Y, k, nObs = nObs, ...)
-        # } else if (method == 'sgd') {
-        #     dr.k <- cve_sgd(X, Y, k, nObs = nObs, ...)
-        } else {
-            stop('Got unknown method.')
-        }
-        dr.k$B <- null(dr.k$V)
-        dr.k$loss <- mean(dr.k$L)
-        dr.k$h <- h
-        dr.k$k <- k
-        class(dr.k) <- "cve.k"
-        dr[[k]] <- dr.k
-    }
-
-    # augment result information
-    dr$method <- method
-    dr$call <- call
-    class(dr) <- "cve"
-    return(dr)
-}
-
-#' Ploting helper for objects of class \code{cve}.
-#'
-#' @param x Object of class \code{cve} (result of [cve()]).
-#' @param content Specifies what to plot:
-#' \itemize{
-#'      \item "history" Plots the loss history from stiefel optimization
-#'          (default).
-#'      \item ... TODO: add (if there are any)
-#' }
-#' @param ... Pass through parameters to [plot()] and [lines()]
-#'
-#' @usage ## S3 method for class 'cve'
-#' plot(x, content = "history", ...)
-#' @seealso see \code{\link{par}} for graphical parameters to pass through
-#'      as well as \code{\link{plot}} for standard plot utility.
-#' @importFrom graphics plot lines points
-#' @method plot cve
-#' @export
-plot.cve <- function(x, ...) {
-    L <- c()
-    k <- c()
-    for (dr.k in x) {
-        if (class(dr.k) == 'cve.k') {
-            k <- c(k, paste0(dr.k$k))
-            L <- c(L, dr.k$L)
-        }
-    }
-    L <- matrix(L, ncol = length(k))
-    boxplot(L, main = "Loss ...",
-            xlab = "SDR dimension k",
-            ylab = expression(L(V, X[i])),
-            names = k)
-}
-
-#' Prints a summary of a \code{cve} result.
-#' @param object Instance of 'cve' as return of \code{cve}.
-#' @method summary cve
-#' @export
-summary.cve <- function(object, ...) {
-    cat('Summary of CVE result - Method: "', object$method, '"\n',
-        '\n',
-        'Dataset size:   ', nrow(object$X), '\n',
-        'Data Dimension: ', ncol(object$X), '\n',
-        'SDR Dimension:  ', object$k, '\n',
-        'loss:           ', object$loss, '\n',
-        '\n',
-        'Called via:\n',
-        '    ',
-        sep='')
-    print(object$call)
-}
--- a/CVE_R/R/cve_linesearch.R
+++ b/CVE_R/R/cve_linesearch.R
@ -1,169 +0,0 @@
-#' Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
-#' conditions.
-#'
-#' @keywords internal
-#' @export
-cve_linesearch <- function(X, Y, k,
-                           nObs = sqrt(nrow(X)),
-                           h = NULL,
-                           tau = 1.0,
-                           tol = 1e-3,
-                           rho1 = 0.1,
-                           rho2 = 0.9,
-                           slack = 0,
-                           epochs = 50L,
-                           attempts = 10L,
-                           max.linesearch.iter = 10L,
-                           logger = NULL
-) {
-    # Set `grad` functions environment to enable if to find this environments
-    # local variabels, needed to enable the manipulation of this local variables
-    # from within `grad`.
-    environment(grad) <- environment()
-
-    # Get dimensions.
-    n <- nrow(X)
-    p <- ncol(X)
-    q <- p - k
-
-    # Save initial learning rate `tau`.
-    tau.init <- tau
-    # Addapt tolearance for break condition.
-    tol <- sqrt(2 * q) * tol
-
-    # Estaimate bandwidth if not given.
-    if (missing(h) | !is.numeric(h)) {
-        h <- estimate.bandwidth(X, k, nObs)
-    }
-
-    # Compute persistent data.
-    # Compute lookup indexes for symmetrie, lower/upper
-    # triangular parts and vectorization.
-    pair.index <- elem.pairs(seq(n))
-    i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
-    j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
-    # Matrix of vectorized indices. (vec(index) -> seq)
-    index <- matrix(seq(n * n), n, n)
-    lower <- index[lower.tri(index)]
-    upper <- t(index)[lower]
-
-    # Create all pairewise differences of rows of `X`.
-    X_diff <- X[i, , drop = F] - X[j, , drop = F]
-    # Identity matrix.
-    I_p <- diag(1, p)
-
-    # Init tracking of current best (according multiple attempts).
-    V.best <- NULL
-    loss.best <- Inf
-
-    # Start loop for multiple attempts.
-    for (attempt in 1:attempts) {
-
-        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
-        # optimization start value.
-        V <- rStiefl(p, q)
-
-        # Initial loss and gradient.
-        loss <- Inf
-        G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
-        # Set last loss (aka, loss after applying the step).
-        loss.last <- loss
-
-        # Call logger with initial values before starting optimization.
-        if (is.function(logger)) {
-            epoch <- 0 # Set epoch count to 0 (only relevant for logging).
-            error <- NA
-            logger(environment())
-        }
-
-        ## Start optimization loop.
-        for (epoch in 1:epochs) {
-
-            # Cayley transform matrix `A`
-            A <- (G %*% t(V)) - (V %*% t(G))
-
-            # Directional derivative of the loss at current position, given
-            # as `Tr(G^T \cdot A \cdot V)`.
-            loss.prime <- -0.5 * norm(A, type = 'F')^2
-
-            # Linesearch
-            tau.upper <- Inf
-            tau.lower <- 0
-            tau <- tau.init
-            for (iter in 1:max.linesearch.iter) {
-                # Apply learning rate `tau`.
-                A.tau <- (tau / 2) * A
-                # Parallet transport (on Stiefl manifold) into direction of `G`.
-                inv <- solve(I_p + A.tau)
-                V.tau <- inv %*% ((I_p - A.tau) %*% V)
-
-                # Loss at position after a step.
-                loss <- Inf # aka loss.tau
-                G.tau <- grad(X, Y, V.tau, h, loss.out = TRUE, persistent = TRUE)
-
-                # Armijo condition.
-                if (loss > loss.last + (rho1 * tau * loss.prime)) {
-                    tau.upper <- tau
-                    tau <- (tau.lower + tau.upper) / 2
-                    next()
-                }
-
-                V.prime.tau <- -0.5 * inv %*% A %*% (V + V.tau)
-                loss.prime.tau <- sum(G * V.prime.tau) # Tr(grad(tau)^T \cdot Y^'(tau))
-
-                # Wolfe condition.
-                if (loss.prime.tau < rho2 * loss.prime) {
-                    tau.lower <- tau
-                    if (tau.upper == Inf) {
-                        tau <- 2 * tau.lower
-                    } else {
-                        tau <- (tau.lower + tau.upper) / 2
-                    }
-                } else {
-                    break()
-                }
-            }
-
-            # Compute error.
-            error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
-
-            # Check break condition (epoch check to skip ignored gradient calc).
-            # Note: the devision by `sqrt(2 * k)` is included in `tol`.
-            if (error < tol | epoch >= epochs) {
-                # take last step and stop optimization.
-                V <- V.tau
-                # Final call to the logger before stopping optimization
-                if (is.function(logger)) {
-                    G <- G.tau
-                    logger(environment())
-                }
-                break()
-            }
-
-            # Perform the step and remember previous loss.
-            V <- V.tau
-            loss.last <- loss
-            G <- G.tau
-
-            # Log after taking current step.
-            if (is.function(logger)) {
-                logger(environment())
-            }
-
-        }
-
-        # Check if current attempt improved previous ones
-        if (loss < loss.best) {
-            loss.best <- loss
-            V.best <- V
-        }
-
-    }
-
-    return(list(
-        loss = loss.best,
-        V = V.best,
-        B = null(V.best),
-        h = h
-    ))
-}
--- a/CVE_R/R/cve_momentum.R
+++ b/CVE_R/R/cve_momentum.R
@ -1,139 +0,0 @@
-#' Implementation of the CVE method as a Riemann Conjugated Gradient method.
-#'
-#' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector
-#' Transport for Optimization on the Stiefel Manifold
-#' @keywords internal
-#' @export
-cve_momentum <- function(X, Y, k,
-                         nObs = sqrt(nrow(X)),
-                         h = NULL,
-                         tau = 1.0,
-                         tol = 1e-4,
-                         rho = 0.1, # Momentum update.
-                         slack = 0,
-                         epochs = 50L,
-                         attempts = 10L,
-                         logger = NULL
-) {
-    # Set `grad` functions environment to enable if to find this environments
-    # local variabels, needed to enable the manipulation of this local variables
-    # from within `grad`.
-    environment(grad) <- environment()
-
-    # Get dimensions.
-    n <- nrow(X) # Number of samples.
-    p <- ncol(X) # Data dimensions
-    q <- p - k   # Complement dimension of the SDR space.
-
-    # Save initial learning rate `tau`.
-    tau.init <- tau
-    # Addapt tolearance for break condition.
-    tol <- sqrt(2 * q) * tol
-
-    # Estaimate bandwidth if not given.
-    if (missing(h) || !is.numeric(h)) {
-        h <- estimate.bandwidth(X, k, nObs)
-    }
-
-    # Compute persistent data.
-    # Compute lookup indexes for symmetrie, lower/upper
-    # triangular parts and vectorization.
-    pair.index <- elem.pairs(seq(n))
-    i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
-    j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
-    # Index of vectorized matrix, for lower and upper triangular part.
-    lower <- ((i - 1) * n) + j
-    upper <- ((j - 1) * n) + i
-
-    # Create all pairewise differences of rows of `X`.
-    X_diff <- X[i, , drop = F] - X[j, , drop = F]
-    # Identity matrix.
-    I_p <- diag(1, p)
-
-    # Init tracking of current best (according multiple attempts).
-    V.best <- NULL
-    loss.best <- Inf
-
-    # Start loop for multiple attempts.
-    for (attempt in 1:attempts) {
-        # Reset learning rate `tau`.
-        tau <- tau.init
-
-        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
-        # optimization start value.
-        V <- rStiefl(p, q)
-
-        # Initial loss and gradient.
-        loss <- Inf
-        G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
-        # Set last loss (aka, loss after applying the step).
-        loss.last <- loss
-
-        # Call logger with initial values before starting optimization.
-        if (is.function(logger)) {
-            epoch <- 0 # Set epoch count to 0 (only relevant for logging).
-            error <- NA
-            logger(environment())
-        }
-
-        M <- matrix(0, p, q)
-        ## Start optimization loop.
-        for (epoch in 1:epochs) {
-            # Apply learning rate `tau`.
-            A <- projTangentStiefl(V, G)
-            # Momentum update.
-            M <- A + rho * projTangentStiefl(V, M)
-            # Parallet transport (on Stiefl manifold) into direction of `G`.
-            V.tau <- retractStiefl(V - tau * M)
-
-            # Loss at position after a step.
-            loss <- grad(X, Y, V.tau, h, loss.only = TRUE, persistent = TRUE)
-
-            # Check if step is appropriate, iff not reduce learning rate.
-            if ((loss - loss.last) > slack * loss.last) {
-                tau <- tau / 2
-                next() # Keep position and try with smaller `tau`.
-            }
-
-            # Compute error.
-            error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
-
-            # Check break condition (epoch check to skip ignored gradient calc).
-            # Note: the devision by `sqrt(2 * k)` is included in `tol`.
-            if (error < tol || epoch >= epochs) {
-                # take last step and stop optimization.
-                V <- V.tau
-                # Call logger last time befor stoping.
-                if (is.function(logger)) {
-                    logger(environment())
-                }
-                break()
-            }
-
-            # Perform the step and remember previous loss.
-            V <- V.tau
-            loss.last <- loss
-
-            # Call logger after taking a step.
-            if (is.function(logger)) {
-                logger(environment())
-            }
-
-            # Compute gradient at new position.
-            G <- grad(X, Y, V, h, persistent = TRUE)
-        }
-
-        # Check if current attempt improved previous ones
-        if (loss < loss.best) {
-            loss.best <- loss
-            V.best <- V
-        }
-    }
-
-    return(list(
-        loss = loss.best,
-        V = V.best,
-        B = null(V.best),
-        h = h
-    ))
-}
--- a/CVE_R/R/cve_rcg.R
+++ b/CVE_R/R/cve_rcg.R
@ -1,179 +0,0 @@
-#' Implementation of the CVE method as a Riemann Conjugated Gradient method.
-#'
-#' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector
-#' Transport for Optimization on the Stiefel Manifold
-#' @keywords internal
-#' @export
-cve_rcg <- function(X, Y, k,
-                         nObs = sqrt(nrow(X)),
-                         h = NULL,
-                         tau = 1.0,
-                         tol = 1e-4,
-                         rho = 1e-4, # For Armijo condition.
-                         slack = 0,
-                         epochs = 50L,
-                         attempts = 10L,
-                         max.linesearch.iter = 20L,
-                         logger = NULL
-) {
-    # Set `grad` functions environment to enable if to find this environments
-    # local variabels, needed to enable the manipulation of this local variables
-    # from within `grad`.
-    environment(grad) <- environment()
-
-    # Get dimensions.
-    n <- nrow(X) # Number of samples.
-    p <- ncol(X) # Data dimensions
-    q <- p - k   # Complement dimension of the SDR space.
-
-    # Save initial learning rate `tau`.
-    tau.init <- tau
-    # Addapt tolearance for break condition.
-    tol <- sqrt(2 * q) * tol
-
-    # Estaimate bandwidth if not given.
-    if (missing(h) || !is.numeric(h)) {
-        h <- estimate.bandwidth(X, k, nObs)
-    }
-
-    # Compute persistent data.
-    # Compute lookup indexes for symmetrie, lower/upper
-    # triangular parts and vectorization.
-    pair.index <- elem.pairs(seq(n))
-    i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
-    j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
-    # Index of vectorized matrix, for lower and upper triangular part.
-    lower <- ((i - 1) * n) + j
-    upper <- ((j - 1) * n) + i
-
-    # Create all pairewise differences of rows of `X`.
-    X_diff <- X[i, , drop = F] - X[j, , drop = F]
-    # Identity matrix.
-    I_p <- diag(1, p)
-
-    # Init tracking of current best (according multiple attempts).
-    V.best <- NULL
-    loss.best <- Inf
-
-    # Start loop for multiple attempts.
-    for (attempt in 1:attempts) {
-        # Reset learning rate `tau`.
-        tau <- tau.init
-
-        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
-        # optimization start value.
-        V <- rStiefl(p, q)
-
-        # Initial loss and gradient.
-        loss <- Inf
-        G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
-        # Set last loss (aka, loss after applying the step).
-        loss.last <- loss
-
-        # Cayley transform matrix `A`
-        A <- (G %*% t(V)) - (V %*% t(G))
-        A.last <- A
-
-        W <- -A
-        Z <- W %*% V
-
-        # Compute directional derivative.
-        loss.prime <- sum(G * Z) # Tr(G^T Z)
-
-        # Call logger with initial values before starting optimization.
-        if (is.function(logger)) {
-            epoch <- 0 # Set epoch count to 0 (only relevant for logging).
-            error <- NA
-            logger(environment())
-        }
-
-        ## Start optimization loop.
-        for (epoch in 1:epochs) {
-            # New directional derivative.
-            loss.prime <- sum(G * Z)
-
-            # Reset `tau` for step-size selection.
-            tau <- tau.init
-            for (iter in 1:max.linesearch.iter) {
-                V.tau <- retractStiefl(V + tau * Z)
-                # Loss at position after a step.
-                loss <- grad(X, Y, V.tau, h,
-                             loss.only = TRUE, persistent = TRUE)
-                # Check Armijo condition.
-                if (loss <= loss.last + (rho * tau * loss.prime)) {
-                    break() # Iff fulfilled stop linesearch.
-                }
-                # Reduce step-size and continue linesearch.
-                tau <- tau / 2
-            }
-
-            # Compute error.
-            error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
-
-            # Perform step with found step-size
-            V <- V.tau
-            loss.last <- loss
-
-            # Call logger.
-            if (is.function(logger)) {
-                logger(environment())
-            }
-
-            # Check break condition.
-            # Note: the devision by `sqrt(2 * k)` is included in `tol`.
-            if (error < tol) {
-                break()
-            }
-
-            # Compute Gradient at new position.
-            G <- grad(X, Y, V, h, persistent = TRUE)
-            # Store last `A` for `beta` computation.
-            A.last <- A
-            # Cayley transform matrix `A`
-            A <- (G %*% t(V)) - (V %*% t(G))
-
-            # Check 2. break condition.
-            if (norm(A, type = 'F') < tol) {
-                break()
-            }
-
-            # New directional derivative.
-            loss.prime <- sum(G * Z)
-
-            # Reset beta if needed.
-            if (loss.prime < 0) {
-                # Compute `beta` as described in paper.
-                beta.FR <- (norm(A, type = 'F') / norm(A.last, type = 'F'))^2
-                beta.PR <- sum(A * (A - A.last)) / norm(A.last, type = 'F')^2
-                if (beta.PR < -beta.FR) {
-                    beta <- -beta.FR
-                } else if (abs(beta.PR) < beta.FR) {
-                    beta <- beta.PR
-                } else if (beta.PR > beta.FR) {
-                    beta <- beta.FR
-                } else {
-                    beta <- 0
-                }
-            } else {
-                beta <- 0
-            }
-
-            # Update direction.
-            W <- -A + beta * W
-            Z <- W %*% V
-        }
-
-        # Check if current attempt improved previous ones
-        if (loss < loss.best) {
-            loss.best <- loss
-            V.best <- V
-        }
-    }
-
-    return(list(
-        loss = loss.best,
-        V = V.best,
-        B = null(V.best),
-        h = h
-    ))
-}
--- a/CVE_R/R/cve_rmsprob.R
+++ b/CVE_R/R/cve_rmsprob.R
@ -1,121 +0,0 @@
-#' Implementation of the CVE method as a Riemann Conjugated Gradient method.
-#'
-#' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector
-#' Transport for Optimization on the Stiefel Manifold
-#' @keywords internal
-#' @export
-cve_rmsprob <- function(X, Y, k,
-                        nObs = sqrt(nrow(X)),
-                        h = NULL,
-                        tau = 0.1,
-                        tol = 1e-4,
-                        rho = 0.1, # Momentum update.
-                        slack = 0,
-                        epochs = 50L,
-                        attempts = 10L,
-                        epsilon = 1e-7,
-                        max.linesearch.iter = 20L,
-                        logger = NULL
-) {
-    # Set `grad` functions environment to enable if to find this environments
-    # local variabels, needed to enable the manipulation of this local variables
-    # from within `grad`.
-    environment(grad) <- environment()
-
-    # Get dimensions.
-    n <- nrow(X) # Number of samples.
-    p <- ncol(X) # Data dimensions
-    q <- p - k   # Complement dimension of the SDR space.
-
-    # Save initial learning rate `tau`.
-    tau.init <- tau
-    # Addapt tolearance for break condition.
-    tol <- sqrt(2 * q) * tol
-
-    # Estaimate bandwidth if not given.
-    if (missing(h) || !is.numeric(h)) {
-        h <- estimate.bandwidth(X, k, nObs)
-    }
-
-    # Compute persistent data.
-    # Compute lookup indexes for symmetrie, lower/upper
-    # triangular parts and vectorization.
-    pair.index <- elem.pairs(seq(n))
-    i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
-    j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
-    # Index of vectorized matrix, for lower and upper triangular part.
-    lower <- ((i - 1) * n) + j
-    upper <- ((j - 1) * n) + i
-
-    # Create all pairewise differences of rows of `X`.
-    X_diff <- X[i, , drop = F] - X[j, , drop = F]
-    # Identity matrix.
-    I_p <- diag(1, p)
-
-    # Init tracking of current best (according multiple attempts).
-    V.best <- NULL
-    loss.best <- Inf
-
-    # Start loop for multiple attempts.
-    for (attempt in 1:attempts) {
-        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
-        # optimization start value.
-        V <- rStiefl(p, q)
-
-        # Call logger with initial values before starting optimization.
-        if (is.function(logger)) {
-            loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
-            epoch <- 0 # Set epoch count to 0 (only relevant for logging).
-            error <- NA
-            logger(environment())
-        }
-
-        M <- matrix(0, p, q)
-        ## Start optimization loop.
-        for (epoch in 1:epochs) {
-            # Compute gradient and loss at current position.
-            loss <- Inf
-            G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
-            # Projectd Gradient.
-            A <- projTangentStiefl(V, G)
-            # Projected element squared gradient.
-            Asq <- projTangentStiefl(V, G * G)
-            # Momentum update.
-            M <- (1 - rho) * Asq + rho * projTangentStiefl(V, M)
-            # Parallet transport (on Stiefl manifold) into direction of `G`.
-            V.tau <- retractStiefl(V - tau.init * A / (sqrt(abs(M)) + epsilon))
-
-            # Compute error.
-            error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
-
-            # Perform step.
-            V <- V.tau
-
-            # Call logger after taking a step.
-            if (is.function(logger)) {
-                # Set tau to an step size estimate (only for logging)
-                tau <- tau.init / mean(sqrt(abs(M)) + epsilon)
-                logger(environment())
-            }
-
-            # Check break condition.
-            # Note: the devision by `sqrt(2 * k)` is included in `tol`.
-            if (error < tol) {
-                break()
-            }
-        }
-
-        # Check if current attempt improved previous ones
-        if (loss < loss.best) {
-            loss.best <- loss
-            V.best <- V
-        }
-    }
-
-    return(list(
-        loss = loss.best,
-        V = V.best,
-        B = null(V.best),
-        h = h
-    ))
-}
--- a/CVE_R/R/cve_sgd.R
+++ b/CVE_R/R/cve_sgd.R
@ -1,129 +0,0 @@
-#' Simple implementation of the CVE method. 'Simple' means that this method is
-#' a classic GD method unsing no further tricks.
-#'
-#' @keywords internal
-#' @export
-cve_sgd <- function(X, Y, k,
-                    nObs = sqrt(nrow(X)),
-                    h = NULL,
-                    tau = 0.01,
-                    tol = 1e-3,
-                    epochs = 50L,
-                    batch.size = 16L,
-                    attempts = 10L,
-                    logger = NULL
-) {
-    # Set `grad` functions environment to enable if to find this environments
-    # local variabels, needed to enable the manipulation of this local variables
-    # from within `grad`.
-    environment(grad) <- environment()
-
-    # Get dimensions.
-    n <- nrow(X) # Number of samples.
-    p <- ncol(X) # Data dimensions
-    q <- p - k   # Complement dimension of the SDR space.
-
-    # Save initial learning rate `tau`.
-    tau.init <- tau
-    # Addapt tolearance for break condition.
-    tol <- sqrt(2 * q) * tol
-
-    # Estaimate bandwidth if not given.
-    if (missing(h) || !is.numeric(h)) {
-        h <- estimate.bandwidth(X, k, nObs)
-    }
-
-    # Compute persistent data.
-    # Compute lookup indexes for symmetrie, lower/upper
-    # triangular parts and vectorization.
-    pair.index <- elem.pairs(seq(n))
-    i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
-    j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
-    # Index of vectorized matrix, for lower and upper triangular part.
-    lower <- ((i - 1) * n) + j
-    upper <- ((j - 1) * n) + i
-
-    # Create all pairewise differences of rows of `X`.
-    X_diff <- X[i, , drop = F] - X[j, , drop = F]
-    # Identity matrix.
-    I_p <- diag(1, p)
-    # Init a list of data indices (shuffled for batching).
-    indices <- seq(n)
-
-    # Init tracking of current best (according multiple attempts).
-    V.best <- NULL
-    loss.best <- Inf
-
-    # Start loop for multiple attempts.
-    for (attempt in 1:attempts) {
-        # Reset learning rate `tau`.
-        tau <- tau.init
-
-        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
-        # optimization start value.
-        V <- rStiefl(p, q)
-        # Keep track of last `V` for computing error after an epoch.
-        V.last <- V
-
-        if (is.function(logger)) {
-            loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
-            error <- NA
-            epoch <- 0
-            logger(environment())
-        }
-
-        # Repeat `epochs` times
-        for (epoch in 1:epochs) {
-            # Shuffle batches
-            batch.shuffle <- sample(indices)
-
-            # Make a step for each batch.
-            for (batch.start in seq(1, n, batch.size)) {
-                # Select batch data indices.
-                batch.end <- min(batch.start + batch.size - 1, length(batch.shuffle))
-                batch <- batch.shuffle[batch.start:batch.end]
-
-                # Compute batch gradient.
-                loss <- NULL
-                G <- grad(X[batch, ], Y[batch], V, h, loss.out = TRUE)
-
-                # Cayley transform matrix.
-                A <- (G %*% t(V)) - (V %*% t(G))
-
-                # Apply learning rate `tau`.
-                A.tau <- tau * A
-                # Parallet transport (on Stiefl manifold) into direction of `G`.
-                V <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
-            }
-            # And the error for the history.
-            error <- norm(V.last %*% t(V.last) - V %*% t(V), type = "F")
-            V.last <- V
-
-            if (is.function(logger)) {
-                # Compute loss at end of epoch for logging.
-                loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
-                logger(environment())
-            }
-
-            # Check break condition.
-            if (error < tol) {
-                break()
-            }
-        }
-        # Compute actual loss after finishing for comparing multiple attempts.
-        loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
-
-        # After each attempt, check if last attempt reached a better result.
-        if (loss < loss.best) {
-            loss.best <- loss
-            V.best <- V
-        }
-    }
-
-    return(list(
-        loss = loss.best,
-        V = V.best,
-        B = null(V.best),
-        h = h
-    ))
-}
--- a/CVE_R/R/cve_sgdrmsprob.R
+++ b/CVE_R/R/cve_sgdrmsprob.R
@ -1,133 +0,0 @@
-#' Simple implementation of the CVE method. 'Simple' means that this method is
-#' a classic GD method unsing no further tricks.
-#'
-#' @keywords internal
-#' @export
-cve_sgdrmsprob <- function(X, Y, k,
-                    nObs = sqrt(nrow(X)),
-                    h = NULL,
-                    tau = 0.1,
-                    tol = 1e-4,
-                    rho = 0.1,
-                    epochs = 50L,
-                    batch.size = 16L,
-                    attempts = 10L,
-                    epsilon = 1e-7,
-                    logger = NULL
-) {
-    # Set `grad` functions environment to enable if to find this environments
-    # local variabels, needed to enable the manipulation of this local variables
-    # from within `grad`.
-    environment(grad) <- environment()
-
-    # Get dimensions.
-    n <- nrow(X) # Number of samples.
-    p <- ncol(X) # Data dimensions
-    q <- p - k   # Complement dimension of the SDR space.
-
-    # Save initial learning rate `tau`.
-    tau.init <- tau
-    # Addapt tolearance for break condition.
-    tol <- sqrt(2 * q) * tol
-
-    # Estaimate bandwidth if not given.
-    if (missing(h) || !is.numeric(h)) {
-        h <- estimate.bandwidth(X, k, nObs)
-    }
-
-    # Compute persistent data.
-    # Compute lookup indexes for symmetrie, lower/upper
-    # triangular parts and vectorization.
-    pair.index <- elem.pairs(seq(n))
-    i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
-    j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
-    # Index of vectorized matrix, for lower and upper triangular part.
-    lower <- ((i - 1) * n) + j
-    upper <- ((j - 1) * n) + i
-
-    # Create all pairewise differences of rows of `X`.
-    X_diff <- X[i, , drop = F] - X[j, , drop = F]
-    # Identity matrix.
-    I_p <- diag(1, p)
-    # Init a list of data indices (shuffled for batching).
-    indices <- seq(n)
-
-    # Init tracking of current best (according multiple attempts).
-    V.best <- NULL
-    loss.best <- Inf
-
-    # Start loop for multiple attempts.
-    for (attempt in 1:attempts) {
-        # Reset learning rate `tau`.
-        tau <- tau.init
-
-        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
-        # optimization start value.
-        V <- rStiefl(p, q)
-        # Keep track of last `V` for computing error after an epoch.
-        V.last <- V
-
-        if (is.function(logger)) {
-            loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
-            error <- NA
-            epoch <- 0
-            logger(environment())
-        }
-
-        M <- matrix(0, p, q)
-        # Repeat `epochs` times
-        for (epoch in 1:epochs) {
-            # Shuffle batches
-            batch.shuffle <- sample(indices)
-
-            # Make a step for each batch.
-            for (batch.start in seq(1, n, batch.size)) {
-                # Select batch data indices.
-                batch.end <- min(batch.start + batch.size - 1, length(batch.shuffle))
-                batch <- batch.shuffle[batch.start:batch.end]
-
-                # Compute batch gradient.
-                loss <- NULL
-                G <- grad(X[batch, ], Y[batch], V, h, loss.out = TRUE)
-
-                # Projectd Gradient.
-                A <- projTangentStiefl(V, G)
-                # Projected element squared gradient.
-                Asq <- projTangentStiefl(V, G * G)
-                # Momentum update.
-                M <- (1 - rho) * Asq + rho * projTangentStiefl(V, M)
-                # Parallet transport (on Stiefl manifold) into direction of `G`.
-                V <- retractStiefl(V - tau.init * A / (sqrt(abs(M)) + epsilon))
-            }
-            # And the error for the history.
-            error <- norm(V.last %*% t(V.last) - V %*% t(V), type = "F")
-            V.last <- V
-
-            if (is.function(logger)) {
-                # Compute loss at end of epoch for logging.
-                loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
-                logger(environment())
-            }
-
-            # Check break condition.
-            if (error < tol) {
-                break()
-            }
-        }
-        # Compute actual loss after finishing for comparing multiple attempts.
-        loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
-
-        # After each attempt, check if last attempt reached a better result.
-        if (loss < loss.best) {
-            loss.best <- loss
-            V.best <- V
-        }
-    }
-
-    return(list(
-        loss = loss.best,
-        V = V.best,
-        B = null(V.best),
-        h = h
-    ))
-}
--- a/CVE_R/R/cve_simple.R
+++ b/CVE_R/R/cve_simple.R
@ -1,139 +0,0 @@
-#' Simple implementation of the CVE method. 'Simple' means that this method is
-#' a classic GD method unsing no further tricks.
-#'
-#' @keywords internal
-#' @export
-cve_simple <- function(X, Y, k,
-                       nObs = sqrt(nrow(X)),
-                       h = NULL,
-                       tau = 1.0,
-                       tol = 1e-3,
-                       slack = 0,
-                       epochs = 50L,
-                       attempts = 10L,
-                       logger = NULL
-) {
-    # Set `grad` functions environment to enable if to find this environments
-    # local variabels, needed to enable the manipulation of this local variables
-    # from within `grad`.
-    environment(grad) <- environment()
-
-    # Get dimensions.
-    n <- nrow(X) # Number of samples.
-    p <- ncol(X) # Data dimensions
-    q <- p - k   # Complement dimension of the SDR space.
-
-    # Save initial learning rate `tau`.
-    tau.init <- tau
-    # Addapt tolearance for break condition.
-    tol <- sqrt(2 * q) * tol
-
-    # Estaimate bandwidth if not given.
-    if (missing(h) || !is.numeric(h)) {
-        h <- estimate.bandwidth(X, k, nObs)
-    }
-
-    # Compute persistent data.
-    # Compute lookup indexes for symmetrie, lower/upper
-    # triangular parts and vectorization.
-    pair.index <- elem.pairs(seq(n))
-    i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
-    j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
-    # Index of vectorized matrix, for lower and upper triangular part.
-    lower <- ((i - 1) * n) + j
-    upper <- ((j - 1) * n) + i
-
-    # Create all pairewise differences of rows of `X`.
-    X_diff <- X[i, , drop = F] - X[j, , drop = F]
-    # Identity matrix.
-    I_p <- diag(1, p)
-
-    # Init tracking of current best (according multiple attempts).
-    V.best <- NULL
-    loss.best <- Inf
-
-    # Start loop for multiple attempts.
-    for (attempt in 1:attempts) {
-        # Reset learning rate `tau`.
-        tau <- tau.init
-
-        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
-        # optimization start value.
-        V <- rStiefl(p, q)
-
-        # Initial loss and gradient.
-        loss <- Inf
-        G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
-        # Set last loss (aka, loss after applying the step).
-        loss.last <- loss
-
-        # Cayley transform matrix `A`
-        A <- (G %*% t(V)) - (V %*% t(G))
-
-        # Call logger with initial values before starting optimization.
-        if (is.function(logger)) {
-            logger(0L, attempt, loss, V, tau)
-        }
-
-        ## Start optimization loop.
-        for (epoch in 1:epochs) {
-            # Apply learning rate `tau`.
-            A.tau <- tau * A
-            # Parallet transport (on Stiefl manifold) into direction of `G`.
-            V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
-
-            # Loss at position after a step.
-            loss <- grad(X, Y, V.tau, h, loss.only = TRUE, persistent = TRUE)
-
-            # Check if step is appropriate, iff not reduce learning rate.
-            if ((loss - loss.last) > slack * loss.last) {
-                tau <- tau / 2
-                next() # Keep position and try with smaller `tau`.
-            }
-
-            # Compute error.
-            error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
-
-            # Check break condition (epoch check to skip ignored gradient calc).
-            # Note: the devision by `sqrt(2 * k)` is included in `tol`.
-            if (error < tol || epoch >= epochs) {
-                # take last step and stop optimization.
-                V <- V.tau
-                # Call logger last time befor stoping.
-                if (is.function(logger)) {
-                    logger(epoch, attempt, loss, V, tau)
-                }
-                break()
-            }
-
-            # Perform the step and remember previous loss.
-            V <- V.tau
-            loss.last <- loss
-
-            # Call logger after taking a step.
-            if (is.function(logger)) {
-                logger(epoch, attempt, loss, V, tau)
-            }
-
-            # Compute gradient at new position.
-            G <- grad(X, Y, V, h, persistent = TRUE)
-
-            # Cayley transform matrix `A`
-            A <- (G %*% t(V)) - (V %*% t(G))
-        }
-
-        # Check if current attempt improved previous ones
-        if (loss < loss.best) {
-            loss.best <- loss
-            V.best <- V
-        }
-
-    }
-
-    return(list(
-        loss = loss.best,
-        V = V.best,
-        B = null(V.best),
-        h = h
-    ))
-}
--- a/CVE_R/R/datasets.R
+++ b/CVE_R/R/datasets.R
@ -1,109 +0,0 @@
-#' Generates test datasets.
-#'
-#' Provides sample datasets. There are 5 different datasets named
-#' M1, M2, M3, M4 and M5 described in the paper references below.
-#' The general model is given by:
-#' \deqn{Y ~ g(B'X) + \epsilon}
-#'
-#' @param name One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}
-#' @param n nr samples
-#' @param p Dim. of random variable \code{X}.
-#' @param p.mix Only for \code{"M4"}, see: below.
-#' @param lambda Only for \code{"M4"}, see: below.
-#'
-#' @return List with elements
-#' \itemize{
-#'      \item{X}{data}
-#'      \item{Y}{response}
-#'      \item{B}{Used dim-reduction matrix}
-#'      \item{name}{Name of the dataset (name parameter)}
-#' }
-#'
-#' @section M1:
-#' The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
-#' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
-#' The link function \eqn{g} is given as
-#' \deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + 0.5\epsilon}{g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + 0.5 epsilon}
-#' @section M2:
-#' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a default of \eqn{n = 200} data points.
-#' The link function \eqn{g} is given as
-#' \deqn{g(x) = x_1 x_2^2 + 0.5\epsilon}{g(x) = x_1 x_2^2 + 0.5 epsilon}
-#' @section M3:
-#' TODO:
-#' @section M4:
-#' TODO:
-#' @section M5:
-#' TODO:
-#'
-#' @import stats
-#' @importFrom stats rnorm rbinom
-#' @export
-dataset <- function(name = "M1", n, B, p.mix = 0.3, lambda = 1.0) {
-    # validate parameters
-    stopifnot(name %in% c("M1", "M2", "M3", "M4", "M5"))
-
-    # set default values if not supplied
-    if (missing(n)) {
-        n <- if (name %in% c("M1", "M2")) 200 else if (name != "M5") 100 else 42
-    }
-    if (missing(B)) {
-        p <- 12
-        if (name == "M1") {
-            B <- cbind(
-                c( 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0),
-                c( 1,-1, 1,-1, 1,-1, 0, 0, 0, 0, 0, 0)
-            ) / sqrt(6)
-        } else if (name == "M2") {
-            B <- cbind(
-                c(c(1, 0), rep(0, 10)),
-                c(c(0, 1), rep(0, 10))
-            )
-        } else {
-            B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, 6)), 12, 1)
-        }
-    } else {
-        p <- dim(B)[1]
-        # validate col. nr to match dataset `k = dim(B)[2]`
-        stopifnot(
-            name %in% c("M1", "M2") && dim(B)[2] == 2,
-            name %in% c("M3", "M4", "M5") && dim(B)[2] == 1
-        )
-    }
-
-    # set link function `g` for model `Y ~ g(B'X) + epsilon`
-    if (name == "M1") {
-        g <- function(BX) { BX[1] / (0.5 + (BX[2] + 1.5)^2) }
-    } else if (name == "M2") {
-        g <- function(BX) { BX[1] * BX[2]^2 }
-    } else if (name %in% c("M3", "M4")) {
-        g <- function(BX) { cos(BX[1]) }
-    } else { # name == "M5"
-        g <- function(BX) { 2 * log(abs(BX[1]) + 1) }
-    }
-
-    # compute X
-    if (name != "M4") {
-        # compute root of the covariance matrix according the dataset
-        if (name %in% c("M1", "M3")) {
-            # Variance-Covariance structure for `X ~ N_p(0, \Sigma)` with
-            # `\Sigma_{i, j} = 0.5^{|i - j|}`.
-            Sigma <- matrix(0.5^abs(kronecker(1:p, 1:p, '-')), p, p)
-            # decompose Sigma to Sigma.root^T Sigma.root = Sigma for usage in creation of `X`
-            Sigma.root <- chol(Sigma)
-        } else { # name %in% c("M2", "M5")
-            Sigma.root <- diag(rep(1, p)) # d-dim identity
-        }
-        # data `X` as multivariate random normal variable with
-        # variance matrix `Sigma`.
-        X <- replicate(p, rnorm(n, 0, 1)) %*% Sigma.root
-    } else { # name == "M4"
-        X <- t(replicate(100, rep((1 - 2 * rbinom(1, 1, p.mix)) * lambda, p) + rnorm(p, 0, 1)))
-    }
-
-    # responce `y ~ g(B'X) + epsilon` with `epsilon ~ N(0, 1 / 2)`
-    Y <- apply(X, 1, function(X_i) {
-        g(t(B) %*% X_i) + rnorm(1, 0, 0.5)
-    })
-
-    return(list(X = X, Y = Y, B = B, name = name))
-}
--- a/CVE_R/R/estimateBandwidth.R
+++ b/CVE_R/R/estimateBandwidth.R
@ -1,27 +0,0 @@
-#' Estimated bandwidth for CVE.
-#'
-#' Estimates a propper bandwidth \code{h} according
-#' \deqn{%
-#' h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
-#' h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
-#'
-#' @param X data matrix of dimension (n x p) with n data points X_i of dimension
-#'  q. Therefor each row represents a datapoint of dimension p.
-#' @param k Guess for rank(B).
-#' @param nObs Ether numeric of a function. If specified as numeric value
-#'  its used in the computation of the bandwidth directly. If its a function
-#'  `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not
-#'  supplied at all is to use \code{nObs <- nrow(x)^0.5}.
-#'
-#' @seealso [\code{\link{qchisq}}]
-#' @export
-estimate.bandwidth <- function(X, k, nObs) {
-    n <- nrow(X)
-    p <- ncol(X)
-
-    X_centered <- scale(X, center=TRUE, scale=FALSE)
-    Sigma <- (1 / n) * t(X_centered) %*% X_centered
-
-    quantil <- qchisq((nObs - 1) / (n - 1), k)
-    return(2 * quantil * sum(diag(Sigma)) / p)
-}
--- a/CVE_R/R/gradient.R
+++ b/CVE_R/R/gradient.R
@ -1,82 +0,0 @@
-#' Compute get gradient of `L(V)` given a dataset `X`.
-#'
-#' @param X Data matrix.
-#' @param Y Responce.
-#' @param V Position to compute the gradient at, aka point on Stiefl manifold.
-#' @param h Bandwidth
-#' @param loss.out Iff \code{TRUE} loss will be written to parent environment.
-#' @param loss.only Boolean to only compute the loss, of \code{TRUE} a single
-#'  value loss is returned and \code{envir} is ignored.
-#' @param persistent Determines if data indices and dependent calculations shall
-#'  be reused from the parent environment. ATTENTION: Do NOT set this flag, only
-#'  intended for internal usage by carefully aligned functions!
-#' @keywords internal
-#' @export
-grad <- function(X, Y, V, h,
-                 loss.out = FALSE,
-                 loss.only = FALSE,
-                 persistent = FALSE) {
-    # Get number of samples and dimension.
-    n <- nrow(X)
-    p <- ncol(X)
-
-    if (!persistent) {
-        # Compute lookup indexes for symmetrie, lower/upper
-        # triangular parts and vectorization.
-        pair.index <- elem.pairs(seq(n))
-        i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
-        j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
-        # Index of vectorized matrix, for lower and upper triangular part.
-        lower <- ((i - 1) * n) + j
-        upper <- ((j - 1) * n) + i
-
-        # Create all pairewise differences of rows of `X`.
-        X_diff <- X[i, , drop = F] - X[j, , drop = F]
-    }
-
-    # Projection matrix onto `span(V)`
-    Q <- diag(1, p) - tcrossprod(V, V)
-
-    # Vectorized distance matrix `D`.
-    vecD <- colSums(tcrossprod(Q, X_diff)^2)
-
-    # Create Kernel matrix (aka. apply kernel to distances)
-    K <- matrix(1, n, n) # `exp(0) == 1`
-    K[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part
-    K[upper] <- t(K)[upper] # Mirror lower tri. to upper
-
-    # Weighted `Y` momentums
-    colSumsK <- colSums(K)
-    y1 <- (K %*% Y) / colSumsK
-    y2 <- (K %*% Y^2) / colSumsK
-
-    # Per example loss `L(V, X_i)`
-    L <- y2 - y1^2
-    if (loss.only) {
-        return(mean(L))
-    }
-    if (loss.out) {
-        loss <<- mean(L)
-    }
-
-    # Compute scaling vector `vecS` for `X_diff`.
-    tmp <- kronecker(matrix(y1, n, 1), matrix(Y, 1, n), `-`)^2
-    tmp <- as.vector(L) - tmp
-    tmp <- tmp * K / colSumsK
-    vecS <- (tmp + t(tmp))[lower] * vecD
-
-    # The gradient.
-    # 1. The `crossprod(A, B)` is equivalent to `t(A) %*% B`,
-    # 2. `(X_diff %*% V) * vecS` is first a marix matrix mult. and then using
-    #    recycling to scale each row with the values of `vecS`.
-    #    Note that `vecS` is a vector and that `R` uses column-major ordering
-    #    of matrices.
-    # (See: notes for more details)
-    # TODO: Depending on n, p, q decide which version to take (for current
-    #    datasets "inner" is faster, see: notes).
-    #    inner = crossprod(X_diff, X_diff * vecS) %*% V,
-    #    outer = crossprod(X_diff, (X_diff %*% V) * vecS)
-    G <- crossprod(X_diff, X_diff * vecS) %*% V
-    G <- (-2 / (n * h^2)) * G
-    return(G)
-}
--- a/CVE_R/R/gridSearch.R
+++ b/CVE_R/R/gridSearch.R
@ -1,43 +0,0 @@
-
-#' Performs a grid search for parameters over a parameter grid.
-#' @examples
-#' args <- list(
-#'     h = c(0.05, 0.1, 0.2),
-#'     method = c("simple", "sgd"),
-#'     tau = c(0.5, 0.1, 0.01)
-#' )
-#' cve.grid.search(args)
-#' @export
-cve.grid.search <- function(X, Y, k, args) {
-
-    args$stringsAsFactors = FALSE
-    args$KEEP.OUT.ATTRS = FALSE
-    grid <- do.call(expand.grid, args)
-    grid.length <- length(grid[[1]])
-
-    print(grid)
-
-    for (i in 1:grid.length) {
-        arguments <- as.list(grid[i, ])
-        # Set required arguments
-        arguments$X <- X
-        arguments$Y <- Y
-        arguments$k <- k
-        # print(arguments)
-        dr <- do.call(cve.call, arguments)
-        print(dr$loss)
-    }
-}
-
-# ds <- dataset()
-# X <- ds$X
-# Y <- ds$Y
-# (k <- ncol(ds$B))
-# args <- list(
-#     h = c(0.05, 0.1, 0.2),
-#     method = c("simple", "sgd"),
-#     tau = c(0.5, 0.1, 0.01),
-#     attempts = c(1L)
-# )
-
-# cve.grid.search(X, Y, k, args)
--- a/CVE_R/R/util.R
+++ b/CVE_R/R/util.R
@ -1,82 +0,0 @@
-#' Samples uniform from the Stiefl Manifold.
-#'
-#' @param p row dim.
-#' @param q col dim.
-#' @return `(p, q)` semi-orthogonal matrix
-#' @examples
-#'  V <- rStiefel(6, 4)
-#' @export
-rStiefl <- function(p, q) {
-    return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
-}
-
-#' Retraction to the manifold.
-#'
-#' @param A matrix.
-#' @return `(p, q)` semi-orthogonal matrix, aka element of the Stiefl manifold.
-#' @keywords internal
-#' @export
-retractStiefl <- function(A) {
-    return(qr.Q(qr(A)))
-}
-
-#' Skew-Symmetric matrix computed from `A` as
-#' \eqn{1/2 (A - A^T)}.
-#' @param A Matrix of dim `(p, q)`
-#' @return Skew-Symmetric matrix of dim `(p, p)`.
-#' @keywords internal
-#' @export
-skew <- function(A) {
-    0.5 * (A - t(A))
-}
-
-#' Symmetric matrix computed from `A` as
-#' \eqn{1/2 (A + A^T)}.
-#' @param A Matrix of dim `(p, q)`
-#' @return Symmetric matrix of dim `(p, p)`.
-#' @keywords internal
-#' @export
-sym <- function(A) {
-    0.5 * (A + t(A))
-}
-
-#' Orthogonal Projection onto the tangent space of the stiefl manifold.
-#'
-#' @param V Point on the stiefl manifold.
-#' @param G matrix to be projected onto the tangent space at `V`.
-#' @return `(p, q)` matrix as element of the tangent space at `V`.
-#' @keywords internal
-#' @export
-projTangentStiefl <- function(V, G) {
-    Q <- diag(1, nrow(V)) - V %*% t(V)
-    return(Q %*% G + V %*% skew(t(V) %*% G))
-}
-
-#' Null space basis of given matrix `V`
-#'
-#' @param V `(p, q)` matrix
-#' @return Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`.
-#' @keywords internal
-#' @export
-null <- function(V) {
-    tmp <- qr(V)
-    set <- if(tmp$rank == 0L) seq_len(ncol(V)) else -seq_len(tmp$rank)
-    return(qr.Q(tmp, complete=TRUE)[, set, drop=FALSE])
-}
-
-#' Creates a (numeric) matrix where each column contains
-#' an element to element matching.
-#' @param elements numeric vector of elements to match
-#' @return matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`.
-#' @keywords internal
-#' @examples
-#'  elem.pairs(seq.int(2, 5))
-#' @export
-elem.pairs <- function(elements) {
-    # Number of elements to match.
-    n <- length(elements)
-    # Create all combinations.
-    pairs <- rbind(rep(elements, each=n), rep(elements, n))
-    # Select unique combinations without self interaction.
-    return(pairs[, pairs[1, ] < pairs[2, ]])
-}
--- a/CVE_R/demo/00Index
+++ b/CVE_R/demo/00Index
@ -1,2 +0,0 @@
-runtime_test    Runtime comparison of CVE against MAVE for M1 - M5 datasets.
-logging         Example of a logger function for cve algorithm analysis.
--- a/CVE_R/demo/logging.R
+++ b/CVE_R/demo/logging.R
@ -1,43 +0,0 @@
-library(CVEpureR)
-
-# Setup histories.
-(epochs <- 50)
-(attempts <- 10)
-loss.history       <- matrix(NA, epochs + 1, attempts)
-error.history      <- matrix(NA, epochs + 1, attempts)
-tau.history        <- matrix(NA, epochs + 1, attempts)
-true.error.history <- matrix(NA, epochs + 1, attempts)
-
-# Create a dataset
-ds <- dataset("M1")
-X <- ds$X
-Y <- ds$Y
-B <- ds$B # the true `B`
-(k <- ncol(ds$B))
-
-# True projection matrix.
-P <- B %*% solve(t(B) %*% B) %*% t(B)
-# Define the logger for the `cve()` method.
-logger <- function(env) {
-    # Note the `<<-` assignement!
-    loss.history[env$epoch + 1, env$attempt] <<- env$loss
-    error.history[env$epoch + 1, env$attempt] <<- env$error
-    tau.history[env$epoch + 1, env$attempt] <<- env$tau
-    # Compute true error by comparing to the true `B`
-    B.est <- null(env$V) # Function provided by CVE
-    P.est <- B.est %*% solve(t(B.est) %*% B.est) %*% t(B.est)
-    true.error <- norm(P - P.est, 'F') / sqrt(2 * k)
-    true.error.history[env$epoch + 1, env$attempt] <<- true.error
-}
-# Performe SDR for ONE `k`.
-dr <- cve(Y ~ X, k = k, logger = logger, epochs = epochs, attempts = attempts)
-# Plot history's
-par(mfrow = c(2, 2))
-matplot(loss.history,       type = 'l', log = 'y', xlab = 'iter',
-        main = 'loss', ylab = expression(L(V[iter])))
-matplot(error.history,      type = 'l', log = 'y', xlab = 'iter',
-        main = 'error', ylab = 'error')
-matplot(tau.history,        type = 'l', log = 'y', xlab = 'iter',
-        main = 'tau', ylab = 'tau')
-matplot(true.error.history, type = 'l', log = 'y', xlab = 'iter',
-        main = 'true error', ylab = 'true error')
--- a/CVE_R/demo/runtime_test.R
+++ b/CVE_R/demo/runtime_test.R
@ -1,89 +0,0 @@
-# Usage:
-# ~$ Rscript runtime_test.R
-
-library(CVEpureR) # load CVE
-
-#' Writes progress to console.
-tell.user <- function(name, start.time, i, length) {
-    cat("\rRunning Test (", name, "):",
-        i, "/", length,
-        " - elapsed:", format(Sys.time() - start.time), "\033[K")
-}
-subspace.dist <- function(B1, B2){
-    P1 <- B1 %*% solve(t(B1) %*% B1) %*% t(B1)
-    P2 <- B2 %*% solve(t(B2) %*% B2) %*% t(B2)
-    return(norm(P1 - P2, type = 'F'))
-}
-
-# Number of simulations
-SIM.NR <- 50
-# maximal number of iterations in curvilinear search algorithm
-MAXIT <- 50
-# number of arbitrary starting values for curvilinear optimization
-ATTEMPTS <- 10
-# set names of datasets
-dataset.names <- c("M1", "M2", "M3", "M4", "M5")
-# Set used CVE method
-methods <- c("simple") # c("legacy", "simple", "sgd", "linesearch")
-
-# Setup error and time tracking variables
-error <- matrix(NA, SIM.NR, length(methods) * length(dataset.names))
-time <- matrix(NA, SIM.NR, ncol(error))
-colnames(error) <- kronecker(paste0(dataset.names, '-'), methods, paste0)
-colnames(time) <- colnames(error)
-
-# only for telling user (to stdout)
-count <- 0
-start.time <- Sys.time()
-# Start simulation loop.
-for (sim in 1:SIM.NR) {
-    # Repeat for each dataset.
-    for (name in dataset.names) {
-        count <- count + 1
-        tell.user(name, start.time, count, SIM.NR * length(dataset.names))
-
-        # Create a new dataset
-        ds <- dataset(name)
-        # Prepare X, Y and combine to data matrix
-        Y <- ds$Y
-        X <- ds$X
-        data <- cbind(Y, X)
-        # get dimensions
-        dim <- ncol(X)
-        truedim <- ncol(ds$B)
-
-        for (method in methods) {
-            dr.time <- system.time(
-                dr <- cve.call(X, Y,
-                    method = method,
-                    k = truedim,
-                    attempts = ATTEMPTS
-                )
-            )
-            dr <- dr[[truedim]]
-
-            key <- paste0(name, '-', method)
-            error[sim, key] <- subspace.dist(dr$B, ds$B) / sqrt(2 * truedim)
-            time[sim, key] <- dr.time["elapsed"]
-        }
-    }
-}
-
-cat("\n\n## Time [sec] Means:\n")
-print(colMeans(time))
-cat("\n## Error Means:\n")
-print(colMeans(error))
-
-at <- seq(ncol(error)) + rep(seq(ncol(error) / length(methods)) - 1, each = length(methods))
-boxplot(error,
-    main = paste0("Error (Nr of simulations ", SIM.NR, ")"),
-    ylab = "Error",
-    las = 2,
-    at = at
-)
-boxplot(time,
-    main = paste0("Time (Nr of simulations ", SIM.NR, ")"),
-    ylab = "Time [sec]",
-    las = 2,
-    at = at
-)
--- a/CVE_R/inst/doc/CVE_paper.pdf
+++ b/CVE_R/inst/doc/CVE_paper.pdf
--- a/CVE_R/man/CVEpureR-package.Rd
+++ b/CVE_R/man/CVEpureR-package.Rd
@ -1,20 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/CVE.R
-\docType{package}
-\name{CVEpureR-package}
-\alias{CVEpureR}
-\alias{CVEpureR-package}
-\title{Conditional Variance Estimator (CVE)}
-\description{
-Conditional Variance Estimator for Sufficient Dimension
- Reduction
-}
-\details{
-TODO: And some details
-}
-\references{
-Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
-}
-\author{
-Loki
-}
--- a/CVE_R/man/cve.Rd
+++ b/CVE_R/man/cve.Rd
@ -1,71 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/CVE.R
-\name{cve}
-\alias{cve}
-\alias{cve.call}
-\title{Implementation of the CVE method.}
-\usage{
-cve(formula, data, method = "simple", max.dim = 10, ...)
-
-cve.call(X, Y, method = "simple", nObs = nrow(X)^0.5, min.dim = 1,
-  max.dim = 10, k, ...)
-}
-\arguments{
-\item{formula}{Formel for the regression model defining `X`, `Y`.
-See: \code{\link{formula}}.}
-
-\item{data}{data.frame holding data for formula.}
-
-\item{method}{The different only differe in the used optimization.
- All of them are Gradient based optimization on a Stiefel manifold.
-\itemize{
-     \item "simple" Simple reduction of stepsize.
-     \item "sgd" stocastic gradient decent.
-     \item TODO: further
-}}
-
-\item{...}{Further parameters depending on the used method.}
-
-\item{X}{Data}
-
-\item{Y}{Responces}
-
-\item{nObs}{as described in the Paper.}
-
-\item{k}{guess for SDR dimension.}
-
-\item{nObs}{Like in the paper.}
-
-\item{...}{Method specific parameters.}
-}
-\description{
-Conditional Variance Estimator (CVE) is a novel sufficient dimension
-reduction (SDR) method assuming a model
-\deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
-where B'X is a lower dimensional projection of the predictors.
-}
-\examples{
-library(CVE)
-
-# sample dataset
-ds <- dataset("M5")
-
-# call ´cve´ with default method (aka "simple")
-dr.simple <- cve(ds$Y ~ ds$X, k = ncol(ds$B))
-# plot optimization history (loss via iteration)
-plot(dr.simple, main = "CVE M5 simple")
-
-# call ´cve´ with method "linesearch" using ´data.frame´ as data.
-data <- data.frame(Y = ds$Y, X = ds$X)
-# Note: ´Y, X´ are NOT defined, they are extracted from ´data´.
-dr.linesearch <- cve(Y ~ ., data, method = "linesearch", k = ncol(ds$B))
-plot(dr.linesearch, main = "CVE M5 linesearch")
-
-}
-\references{
-Fertl L., Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
-}
-\seealso{
-\code{\link{formula}}. For a complete parameters list (dependent on
- the method) see \code{\link{cve_simple}}, \code{\link{cve_sgd}}
-}
--- a/CVE_R/man/cve.grid.search.Rd
+++ b/CVE_R/man/cve.grid.search.Rd
@ -1,19 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/gridSearch.R
-\name{cve.grid.search}
-\alias{cve.grid.search}
-\title{Performs a grid search for parameters over a parameter grid.}
-\usage{
-cve.grid.search(X, Y, k, args)
-}
-\description{
-Performs a grid search for parameters over a parameter grid.
-}
-\examples{
-args <- list(
-    h = c(0.05, 0.1, 0.2),
-    method = c("simple", "sgd"),
-    tau = c(0.5, 0.1, 0.01)
-)
-cve.grid.search(args)
-}
--- a/CVE_R/man/cve_linesearch.Rd
+++ b/CVE_R/man/cve_linesearch.Rd
@ -1,16 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/cve_linesearch.R
-\name{cve_linesearch}
-\alias{cve_linesearch}
-\title{Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
-conditions.}
-\usage{
-cve_linesearch(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
-  tol = 0.001, rho1 = 0.1, rho2 = 0.9, slack = 0, epochs = 50L,
-  attempts = 10L, max.linesearch.iter = 10L, logger = NULL)
-}
-\description{
-Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
-conditions.
-}
-\keyword{internal}
--- a/CVE_R/man/cve_sgd.Rd
+++ b/CVE_R/man/cve_sgd.Rd
@ -1,16 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/cve_sgd.R
-\name{cve_sgd}
-\alias{cve_sgd}
-\title{Simple implementation of the CVE method. 'Simple' means that this method is
-a classic GD method unsing no further tricks.}
-\usage{
-cve_sgd(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 0.01,
-  tol = 0.001, epochs = 50L, batch.size = 16L, attempts = 10L,
-  logger = NULL)
-}
-\description{
-Simple implementation of the CVE method. 'Simple' means that this method is
-a classic GD method unsing no further tricks.
-}
-\keyword{internal}
--- a/CVE_R/man/cve_simple.Rd
+++ b/CVE_R/man/cve_simple.Rd
@ -1,16 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/cve_simple.R
-\name{cve_simple}
-\alias{cve_simple}
-\title{Simple implementation of the CVE method. 'Simple' means that this method is
-a classic GD method unsing no further tricks.}
-\usage{
-cve_simple(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
-  tol = 0.001, slack = 0, epochs = 50L, attempts = 10L,
-  logger = NULL)
-}
-\description{
-Simple implementation of the CVE method. 'Simple' means that this method is
-a classic GD method unsing no further tricks.
-}
-\keyword{internal}
--- a/CVE_R/man/dataset.Rd
+++ b/CVE_R/man/dataset.Rd
@ -1,64 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/datasets.R
-\name{dataset}
-\alias{dataset}
-\title{Generates test datasets.}
-\usage{
-dataset(name = "M1", n, B, p.mix = 0.3, lambda = 1)
-}
-\arguments{
-\item{name}{One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}}
-
-\item{n}{nr samples}
-
-\item{p.mix}{Only for \code{"M4"}, see: below.}
-
-\item{lambda}{Only for \code{"M4"}, see: below.}
-
-\item{p}{Dim. of random variable \code{X}.}
-}
-\value{
-List with elements
-\itemize{
-     \item{X}{data}
-     \item{Y}{response}
-     \item{B}{Used dim-reduction matrix}
-     \item{name}{Name of the dataset (name parameter)}
-}
-}
-\description{
-Provides sample datasets. There are 5 different datasets named
-M1, M2, M3, M4 and M5 described in the paper references below.
-The general model is given by:
-\deqn{Y ~ g(B'X) + \epsilon}
-}
-\section{M1}{
-
-The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
-dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
-The link function \eqn{g} is given as
-\deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + 0.5\epsilon}{g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + 0.5 epsilon}
-}
-
-\section{M2}{
-
-\eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a default of \eqn{n = 200} data points.
-The link function \eqn{g} is given as
-\deqn{g(x) = x_1 x_2^2 + 0.5\epsilon}{g(x) = x_1 x_2^2 + 0.5 epsilon}
-}
-
-\section{M3}{
-
-TODO:
-}
-
-\section{M4}{
-
-TODO:
-}
-
-\section{M5}{
-
-TODO:
-}
-
--- a/CVE_R/man/elem.pairs.Rd
+++ b/CVE_R/man/elem.pairs.Rd
@ -1,23 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/util.R
-\name{elem.pairs}
-\alias{elem.pairs}
-\title{Creates a (numeric) matrix where each column contains
-an element to element matching.}
-\usage{
-elem.pairs(elements)
-}
-\arguments{
-\item{elements}{numeric vector of elements to match}
-}
-\value{
-matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`.
-}
-\description{
-Creates a (numeric) matrix where each column contains
-an element to element matching.
-}
-\examples{
- elem.pairs(seq.int(2, 5))
-}
-\keyword{internal}
--- a/CVE_R/man/estimate.bandwidth.Rd
+++ b/CVE_R/man/estimate.bandwidth.Rd
@ -1,28 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/estimateBandwidth.R
-\name{estimate.bandwidth}
-\alias{estimate.bandwidth}
-\title{Estimated bandwidth for CVE.}
-\usage{
-estimate.bandwidth(X, k, nObs)
-}
-\arguments{
-\item{X}{data matrix of dimension (n x p) with n data points X_i of dimension
-q. Therefor each row represents a datapoint of dimension p.}
-
-\item{k}{Guess for rank(B).}
-
-\item{nObs}{Ether numeric of a function. If specified as numeric value
-its used in the computation of the bandwidth directly. If its a function
-`nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not
-supplied at all is to use \code{nObs <- nrow(x)^0.5}.}
-}
-\description{
-Estimates a propper bandwidth \code{h} according
-\deqn{%
-h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
-h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
-}
-\seealso{
-[\code{\link{qchisq}}]
-}
--- a/CVE_R/man/grad.Rd
+++ b/CVE_R/man/grad.Rd
@ -1,31 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/gradient.R
-\name{grad}
-\alias{grad}
-\title{Compute get gradient of `L(V)` given a dataset `X`.}
-\usage{
-grad(X, Y, V, h, loss.out = FALSE, loss.only = FALSE,
-  persistent = FALSE)
-}
-\arguments{
-\item{X}{Data matrix.}
-
-\item{Y}{Responce.}
-
-\item{V}{Position to compute the gradient at, aka point on Stiefl manifold.}
-
-\item{h}{Bandwidth}
-
-\item{loss.out}{Iff \code{TRUE} loss will be written to parent environment.}
-
-\item{loss.only}{Boolean to only compute the loss, of \code{TRUE} a single
-value loss is returned and \code{envir} is ignored.}
-
-\item{persistent}{Determines if data indices and dependent calculations shall
-be reused from the parent environment. ATTENTION: Do NOT set this flag, only
-intended for internal usage by carefully aligned functions!}
-}
-\description{
-Compute get gradient of `L(V)` given a dataset `X`.
-}
-\keyword{internal}
--- a/CVE_R/man/null.Rd
+++ b/CVE_R/man/null.Rd
@ -1,18 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/util.R
-\name{null}
-\alias{null}
-\title{Null space basis of given matrix `V`}
-\usage{
-null(V)
-}
-\arguments{
-\item{V}{`(p, q)` matrix}
-}
-\value{
-Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`.
-}
-\description{
-Null space basis of given matrix `V`
-}
-\keyword{internal}
--- a/CVE_R/man/plot.cve.Rd
+++ b/CVE_R/man/plot.cve.Rd
@ -1,28 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/CVE.R
-\name{plot.cve}
-\alias{plot.cve}
-\title{Ploting helper for objects of class \code{cve}.}
-\usage{
-## S3 method for class 'cve'
-plot(x, content = "history", ...)
-}
-\arguments{
-\item{x}{Object of class \code{cve} (result of [cve()]).}
-
-\item{...}{Pass through parameters to [plot()] and [lines()]}
-
-\item{content}{Specifies what to plot:
-\itemize{
-     \item "history" Plots the loss history from stiefel optimization
-         (default).
-     \item ... TODO: add (if there are any)
-}}
-}
-\description{
-Ploting helper for objects of class \code{cve}.
-}
-\seealso{
-see \code{\link{par}} for graphical parameters to pass through
-     as well as \code{\link{plot}} for standard plot utility.
-}
--- a/CVE_R/man/rStiefl.Rd
+++ b/CVE_R/man/rStiefl.Rd
@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/util.R
-\name{rStiefl}
-\alias{rStiefl}
-\title{Samples uniform from the Stiefel Manifold}
-\usage{
-rStiefl(p, q)
-}
-\arguments{
-\item{p}{row dim.}
-
-\item{q}{col dim.}
-}
-\value{
-`(p, q)` semi-orthogonal matrix
-}
-\description{
-Samples uniform from the Stiefel Manifold
-}
-\examples{
- V <- rStiefel(6, 4)
-}
--- a/CVE_R/man/summary.cve.Rd
+++ b/CVE_R/man/summary.cve.Rd
@ -1,14 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/CVE.R
-\name{summary.cve}
-\alias{summary.cve}
-\title{Prints a summary of a \code{cve} result.}
-\usage{
-\method{summary}{cve}(object, ...)
-}
-\arguments{
-\item{object}{Instance of 'cve' as return of \code{cve}.}
-}
-\description{
-Prints a summary of a \code{cve} result.
-}
--- a/benchmark/benchmark.R
+++ b/benchmark/benchmark.R
@ -1,479 +0,0 @@
-library(microbenchmark)
-
-dyn.load("benchmark.so")
-
-
-## rowSum* .call --------------------------------------------------------------
-rowSums.c <- function(M) {
-    stopifnot(
-        is.matrix(M),
-        is.numeric(M)
-    )
-    if (!is.double(M)) {
-        M <- matrix(as.double(M), nrow = nrow(M))
-    }
-
-    .Call('R_rowSums', PACKAGE = 'benchmark', M)
-}
-rowSumsV2.c <- function(M) {
-    stopifnot(
-        is.matrix(M),
-        is.numeric(M)
-    )
-    if (!is.double(M)) {
-        M <- matrix(as.double(M), nrow = nrow(M))
-    }
-
-    .Call('R_rowSumsV2', PACKAGE = 'benchmark', M)
-}
-rowSumsV3.c <- function(M) {
-    stopifnot(
-        is.matrix(M),
-        is.numeric(M)
-    )
-    if (!is.double(M)) {
-        M <- matrix(as.double(M), nrow = nrow(M))
-    }
-
-    .Call('R_rowSumsV3', PACKAGE = 'benchmark', M)
-}
-colSums.c <- function(M) {
-    stopifnot(
-        is.matrix(M),
-        is.numeric(M)
-    )
-    if (!is.double(M)) {
-        M <- matrix(as.double(M), nrow = nrow(M))
-    }
-
-    .Call('R_colSums', PACKAGE = 'benchmark', M)
-}
-rowSquareSums.c <- function(M) {
-    stopifnot(
-        is.matrix(M),
-        is.numeric(M)
-    )
-    if (!is.double(M)) {
-        M <- matrix(as.double(M), nrow = nrow(M))
-    }
-
-    .Call('R_rowSquareSums', PACKAGE = 'benchmark', M)
-}
-rowSumsSymVec.c <- function(vecA, nrow, diag = 0.0) {
-    stopifnot(
-        is.vector(vecA),
-        is.numeric(vecA),
-        is.numeric(diag),
-        nrow * (nrow - 1) == length(vecA) * 2
-    )
-    if (!is.double(vecA)) {
-        vecA <- as.double(vecA)
-    }
-    .Call('R_rowSumsSymVec', PACKAGE = 'benchmark',
-          vecA, as.integer(nrow), as.double(diag))
-}
-rowSweep.c <- function(A, v, op = '-') {
-    stopifnot(
-        is.matrix(A),
-        is.numeric(v)
-    )
-    if (!is.double(A)) {
-        A <- matrix(as.double(A), nrow = nrow(A))
-    }
-    if (!is.vector(v) || !is.double(v)) {
-        v <- as.double(v)
-    }
-    stopifnot(
-        nrow(A) == length(v),
-        op %in% c('+', '-', '*', '/')
-    )
-
-    .Call('R_rowSweep', PACKAGE = 'benchmark', A, v, op)
-}
-
-## row*, col* tests ------------------------------------------------------------
-n <- 3000
-M <- matrix(runif(n * 12), n, 12)
-stopifnot(
-    all.equal(rowSums(M^2), rowSums.c(M^2)),
-    all.equal(colSums(M), colSums.c(M)),
-    all.equal(rowSums(M), rowSumsV2.c(M)),
-    all.equal(rowSums(M), rowSumsV3.c(M))
-)
-microbenchmark(
-    rowSums     = rowSums(M),
-    rowSums.c   = rowSums.c(M),
-    rowSumsV2.c = rowSumsV2.c(M),
-    rowSumsV3.c = rowSumsV3.c(M)
-)
-microbenchmark(
-    rowSums     = rowSums(M^2),
-    rowSums.c   = rowSums.c(M^2),
-    rowSqSums.c = rowSquareSums.c(M)
-)
-microbenchmark(
-    colSums   = colSums(M),
-    colSums.c = colSums.c(M)
-)
-
-sum = rowSums(M)
-stopifnot(all.equal(
-    sweep(M, 1, sum, FUN = `/`),
-    rowSweep.c(M, sum, '/') # Col-Normalize)
-), all.equal(
-    sweep(M, 1, sum, FUN = `/`),
-    M / sum
-))
-microbenchmark(
-    sweep = sweep(M, 1, sum, FUN = `/`),
-    M / sum,
-    rowSweep.c = rowSweep.c(M, sum, '/') # Col-Normalize)
-)
-
-# Create symmetric matrix with constant diagonal entries.
-nrow <- 200
-diag <- 1.0
-Sym <- tcrossprod(runif(nrow))
-diag(Sym) <- diag
-# Get vectorized lower triangular part of `Sym` matrix.
-SymVec <- Sym[lower.tri(Sym)]
-stopifnot(all.equal(
-    rowSums(Sym),
-    rowSumsSymVec.c(SymVec, nrow, diag)
-))
-microbenchmark(
-    rowSums = rowSums(Sym),
-    rowSums.c = rowSums.c(Sym),
-    rowSumsSymVec.c = rowSumsSymVec.c(SymVec, nrow, diag)
-)
-
-## Matrix-Matrix opperation .call ---------------------------------------------
-transpose.c <- function(A) {
-    stopifnot(
-        is.matrix(A), is.numeric(A)
-    )
-    if (!is.double(A)) {
-        A <- matrix(as.double(A), nrow(A), ncol(A))
-    }
-
-    .Call('R_transpose', PACKAGE = 'benchmark', A)
-}
-
-sympMV.c <- function(vecA, x) {
-    stopifnot(
-        is.vector(vecA), is.numeric(vecA),
-        is.vector(x), is.numeric(x),
-        length(x) * (length(x) + 1) == 2 * length(vecA)
-    )
-    if (!is.double(vecA)) {
-        vecA <- as.double(vecA)
-    }
-    if (!is.double(x)) {
-        x <- as.double(x)
-    }
-
-    .Call('R_sympMV', PACKAGE = 'benchmark', vecA, x)
-}
-
-matrixprod.c <- function(A, B) {
-    stopifnot(
-        is.matrix(A), is.numeric(A),
-        is.matrix(B), is.numeric(B),
-        ncol(A) == nrow(B)
-    )
-    if (!is.double(A)) {
-        A <- matrix(as.double(A), nrow = nrow(A))
-    }
-    if (!is.double(B)) {
-        B <- matrix(as.double(B), nrow = nrow(B))
-    }
-
-    .Call('R_matrixprod', PACKAGE = 'benchmark', A, B)
-}
-crossprod.c <- function(A, B) {
-    stopifnot(
-        is.matrix(A), is.numeric(A),
-        is.matrix(B), is.numeric(B),
-        nrow(A) == nrow(B)
-    )
-    if (!is.double(A)) {
-        A <- matrix(as.double(A), nrow = nrow(A))
-    }
-    if (!is.double(B)) {
-        B <- matrix(as.double(B), nrow = nrow(B))
-    }
-
-    .Call('R_crossprod', PACKAGE = 'benchmark', A, B)
-}
-kronecker.c <- function(A, B, op = '*') {
-    stopifnot(
-        is.matrix(A), is.numeric(A),
-        is.matrix(B), is.numeric(B),
-        is.character(op), op %in% c('*', '+', '/', '-')
-    )
-    if (!is.double(A)) {
-        A <- matrix(as.double(A), nrow = nrow(A))
-    }
-    if (!is.double(B)) {
-        B <- matrix(as.double(B), nrow = nrow(B))
-    }
-
-    .Call('R_kronecker', PACKAGE = 'benchmark', A, B, op)
-}
-skewSymRank2k.c <- function(A, B, alpha = 1, beta = 0) {
-    stopifnot(
-        is.matrix(A), is.numeric(A),
-        is.matrix(B), is.numeric(B),
-        all(dim(A) == dim(B)),
-        is.numeric(alpha), length(alpha) == 1L,
-        is.numeric(beta), length(beta) == 1L
-    )
-    if (!is.double(A)) {
-        A <- matrix(as.double(A), nrow = nrow(A))
-    }
-    if (!is.double(B)) {
-        B <- matrix(as.double(B), nrow = nrow(B))
-    }
-
-    .Call('R_skewSymRank2k', PACKAGE = 'benchmark', A, B,
-          as.double(alpha), as.double(beta))
-}
-
-## Matrix-Matrix opperation tests ---------------------------------------------
-n <- 200
-k <- 100
-m <- 300
-
-A <- matrix(runif(n * k), n, k)
-B <- matrix(runif(k * m), k, m)
-stopifnot(
-    all.equal(t(A), transpose.c(A))
-)
-microbenchmark(
-    t(A),
-    transpose.c(A)
-)
-
-Sym <- tcrossprod(runif(n))
-vecSym <- Sym[lower.tri(Sym, diag = T)]
-x <- runif(n)
-stopifnot(all.equal(
-    as.double(Sym %*% x),
-    sympMV.c(vecSym, x)
-))
-microbenchmark(
-    Sym %*% x,
-    sympMV.c = sympMV.c(vecSym, x)
-)
-
-stopifnot(
-    all.equal(A %*% B, matrixprod.c(A, B))
-)
-microbenchmark(
-    "%*%" = A %*% B,
-    matrixprod.c = matrixprod.c(A, B)
-)
-
-A <- matrix(runif(k * n), k, n)
-B <- matrix(runif(k * m), k, m)
-stopifnot(
-    all.equal(crossprod(A, B), crossprod.c(A, B))
-)
-microbenchmark(
-    crossprod = crossprod(A, B),
-    crossprod.c = crossprod.c(A, B)
-)
-
-n <- 100L
-m <- 12L
-p <- 11L
-q <- 10L
-A <- matrix(runif(n * m), n, m)
-B <- matrix(runif(p * q), p, q)
-
-stopifnot(all.equal(
-    kronecker(A, B),
-    kronecker.c(A, B)
-))
-microbenchmark(
-    kronecker = kronecker(A, B),
-    kronecker.c = kronecker.c(A, B)
-)
-
-n <- 12
-k <- 11
-A <- matrix(runif(n * k), n, k)
-B <- matrix(runif(n * k), n, k)
-stopifnot(all.equal(
-    A %*% t(B) - B %*% t(A), skewSymRank2k.c(A, B)
-))
-microbenchmark(
-    A %*% t(B) - B %*% t(A),
-    skewSymRank2k.c(A, B)
-)
-
-## Orthogonal projection onto null space .Call --------------------------------
-nullProj.c <- function(B) {
-    stopifnot(
-        is.matrix(B), is.numeric(B)
-    )
-    if (!is.double(B)) {
-        B <- matrix(as.double(B), nrow = nrow(B))
-    }
-
-    .Call('R_nullProj', PACKAGE = 'benchmark', B)
-}
-## Orthogonal projection onto null space tests --------------------------------
-p <- 12
-q <- 10
-V <- qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q)))
-
-# Projection matrix onto `span(V)`
-Q <- diag(1, p) - tcrossprod(V, V)
-stopifnot(
-    all.equal(Q, nullProj.c(V))
-)
-microbenchmark(
-    nullProj = diag(1, p) - tcrossprod(V, V),
-    nullProj.c = nullProj.c(V)
-)
-
-# ## Kronecker optimizations ----------------------------------------------------
-# library(microbenchmark)
-
-# dist.1 <- function(X_diff, Q) {
-#     rowSums((X_diff %*% Q)^2)
-# }
-# dist.2 <- function(X, Q) {
-#     ones <- rep(1, nrow(X))
-#     proj <- X %*% Q
-#     rowSums((kronecker(proj, ones) - kronecker(ones, proj))^2)
-# }
-
-# n <- 400L
-# p <- 12L
-# k <- 2L
-# q <- p - k
-
-# X <- matrix(rnorm(n * p), n, p)
-# Q <- diag(1, p) - tcrossprod(rnorm(p))
-# ones <- rep(1, n)
-# X_diff <- kronecker(X, ones) - kronecker(ones, X)
-
-# stopifnot(all.equal(dist.1(X_diff, Q), dist.2(X, Q)))
-
-# microbenchmark(
-#     dist.1(X_diff, Q),
-#     dist.2(X, Q),
-#     times = 10L
-# )
-#     # if (!persistent) {
-#     #     pair.index <- elem.pairs(seq(n))
-#     #     i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
-#     #     j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
-#     #     lower <- ((i - 1) * n) + j
-#     #     upper <- ((j - 1) * n) + i
-#     #     X_diff <- X[i, , drop = F] - X[j, , drop = F]
-#     # }
-
-#     # # Projection matrix onto `span(V)`
-#     # Q <- diag(1, p) - tcrossprod(V, V)
-#     # # Vectorized distance matrix `D`.
-#     # vecD <- rowSums((X_diff %*% Q)^2)
-
-
-
-# ## WIP for gradient. ----------------------------------------------------------
-
-grad.c <- function(X, X_diff, Y, V, h) {
-    stopifnot(
-        is.matrix(X), is.double(X),
-        is.matrix(X_diff), is.double(X_diff),
-            ncol(X_diff) == ncol(X), nrow(X_diff) == nrow(X) * (nrow(X) - 1) / 2,
-        is.vector(Y) || (is.matrix(Y) && pmin(dim(Y)) == 1L), is.double(Y),
-            length(Y) == nrow(X),
-        is.matrix(V), is.double(V),
-            nrow(V) == ncol(X),
-        is.vector(h), is.numeric(h), length(h) == 1
-    )
-
-    .Call('R_grad', PACKAGE = 'benchmark',
-          X, X_diff, as.double(Y), V, as.double(h));
-}
-
-elem.pairs <- function(elements) {
-    # Number of elements to match.
-    n <- length(elements)
-    # Create all combinations.
-    pairs <- rbind(rep(elements, each=n), rep(elements, n))
-    # Select unique combinations without self interaction.
-    return(pairs[, pairs[1, ] < pairs[2, ]])
-}
-grad <- function(X, Y, V, h, persistent = TRUE) {
-    n <- nrow(X)
-    p <- ncol(X)
-
-    if (!persistent) {
-        pair.index <- elem.pairs(seq(n))
-        i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
-        j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
-        lower <- ((i - 1) * n) + j
-        upper <- ((j - 1) * n) + i
-        X_diff <- X[i, , drop = F] - X[j, , drop = F]
-    }
-
-    # Projection matrix onto `span(V)`
-    Q <- diag(1, p) - tcrossprod(V, V)
-    # Vectorized distance matrix `D`.
-    vecD <- rowSums((X_diff %*% Q)^2)
-
-    # Create Kernel matrix (aka. apply kernel to distances)
-    K <- matrix(1, n, n) # `exp(0) == 1`
-    K[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part
-    K[upper] <- t(K)[upper] # Mirror lower tri. to upper
-
-    # Weighted `Y` momentums
-    colSumsK <- colSums(K)
-    y1 <- (K %*% Y) / colSumsK
-    y2 <- (K %*% Y^2) / colSumsK
-    # Per example loss `L(V, X_i)`
-    L <- y2 - y1^2
-
-    # Compute scaling vector `vecS` for `X_diff`.
-    tmp <- kronecker(matrix(y1, n, 1), matrix(Y, 1, n), `-`)^2
-    tmp <- as.vector(L) - tmp
-    tmp <- tmp * K / colSumsK
-    vecS <- (tmp + t(tmp))[lower] * vecD
-
-    G <- crossprod(X_diff, X_diff * vecS) %*% V
-    G <- (-2 / (n * h^2)) * G
-    return(G)
-}
-rStiefel <- function(p, q) {
-    return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
-}
-
-n <- 200
-p <- 12
-q <- 10
-
-X <- matrix(runif(n * p), n, p)
-Y <- runif(n)
-V <- rStiefel(p, q)
-h <- 0.1
-
-pair.index <- elem.pairs(seq(n))
-i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
-j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
-lower <- ((i - 1) * n) + j
-upper <- ((j - 1) * n) + i
-X_diff <- X[i, , drop = F] - X[j, , drop = F]
-
-stopifnot(all.equal(
-    grad(X, Y, V, h),
-    grad.c(X, X_diff, Y, V, h)
-))
-microbenchmark(
-    grad = grad(X, Y, V, h),
-    grad.c = grad.c(X, X_diff, Y, V, h)
-)
--- a/benchmark/benchmark.c
+++ b/benchmark/benchmark.c
@ -1,510 +0,0 @@
-#include <stdlib.h>
-#include <string.h> // for `mem*` functions.
-
-#include <R_ext/BLAS.h>
-#include <R_ext/Lapack.h>
-#include <R_ext/Error.h>
-// #include <Rmath.h>
-
-#include "benchmark.h"
-
-void rowSums(const double *A,
-             const int nrow, const int ncol,
-             double *sum) {
-    int i, j, block_size, block_size_i;
-    const double *A_block = A;
-    const double *A_end = A + nrow * ncol;
-
-    if (nrow > 508) {
-        block_size = 508;
-    } else {
-        block_size = nrow;
-    }
-
-    // Iterate `(block_size_i, ncol)` submatrix blocks.
-    for (i = 0; i < nrow; i += block_size_i) {
-        // Reset `A` to new block beginning.
-        A = A_block;
-        // Take block size of eveything left and reduce to max size.
-        block_size_i = nrow - i;
-        if (block_size_i > block_size) {
-            block_size_i = block_size;
-        }
-        // Copy blocks first column.
-        for (j = 0; j < block_size_i; j += 4) {
-            sum[j]     = A[j];
-            sum[j + 1] = A[j + 1];
-            sum[j + 2] = A[j + 2];
-            sum[j + 3] = A[j + 3];
-        }
-        for (; j < block_size_i; ++j) {
-            sum[j] = A[j];
-        }
-        // Sum following columns to the first one.
-        for (A += nrow; A < A_end; A += nrow) {
-            for (j = 0; j < block_size_i; j += 4) {
-                sum[j]     += A[j];
-                sum[j + 1] += A[j + 1];
-                sum[j + 2] += A[j + 2];
-                sum[j + 3] += A[j + 3];
-            }
-            for (; j < block_size_i; ++j) {
-                sum[j] += A[j];
-            }
-        }
-        // Step one block forth.
-        A_block += block_size_i;
-        sum += block_size_i;
-    }
-}
-
-void rowSumsV2(const double *A,
-               const int nrow, const int ncol,
-               double *sum) {
-    int i, j, block_size, block_size_i;
-    const double *A_block = A;
-    const double *A_end = A + nrow * ncol;
-
-    if (nrow > CVE_MEM_CHUNK_SIZE) {
-        block_size = CVE_MEM_CHUNK_SIZE;
-    } else {
-        block_size = nrow;
-    }
-
-    // Iterate `(block_size_i, ncol)` submatrix blocks.
-    for (i = 0; i < nrow; i += block_size_i) {
-        // Reset `A` to new block beginning.
-        A = A_block;
-        // Take block size of eveything left and reduce to max size.
-        block_size_i = nrow - i;
-        if (block_size_i > block_size) {
-            block_size_i = block_size;
-        }
-        // Compute first blocks column,
-        for (j = 0; j < block_size_i; ++j) {
-            sum[j] = A[j];
-        }
-        // and sum the following columns to the first one.
-        for (A += nrow; A < A_end; A += nrow) {
-            for (j = 0; j < block_size_i; ++j) {
-                sum[j] += A[j];
-            }
-        }
-        // Step one block forth.
-        A_block += block_size_i;
-        sum += block_size_i;
-    }
-}
-void rowSumsV3(const double *A,
-               const int nrow, const int ncol,
-               double *sum) {
-    int i, onei = 1;
-    double* ones = (double*)malloc(ncol * sizeof(double));
-    const double one = 1.0;
-    const double zero = 0.0;
-
-    for (i = 0; i < ncol; ++i) {
-        ones[i] = 1.0;
-    }
-
-    matrixprod(A, nrow, ncol, ones, ncol, 1, sum);
-    free(ones);
-}
-
-void colSums(const double *A, const int nrow, const int ncol,
-             double *sums) {
-    int i, j, nrowb = 4 * (nrow / 4); // 4 dividable nrow block, biggest 4*k <= nrow.
-    double sum;
-
-    for (j = 0; j < ncol; ++j) {
-        sum = 0.0;
-        for (i = 0; i < nrowb; i += 4) {
-            sum += A[i]
-                 + A[i + 1]
-                 + A[i + 2]
-                 + A[i + 3];
-        }
-        for (; i < nrow; ++i) {
-            sum += A[i];
-        }
-        *(sums++) = sum;
-        A += nrow;
-    }
-}
-
-void rowSquareSums(const double *A,
-                   const int nrow, const int ncol,
-                   double *sum) {
-    int i, j, block_size, block_size_i;
-    const double *A_block = A;
-    const double *A_end = A + nrow * ncol;
-
-    if (nrow > 508) {
-        block_size = 508;
-    } else {
-        block_size = nrow;
-    }
-
-    // Iterate `(block_size_i, ncol)` submatrix blocks.
-    for (i = 0; i < nrow; i += block_size_i) {
-        // Reset `A` to new block beginning.
-        A = A_block;
-        // Take block size of eveything left and reduce to max size.
-        block_size_i = nrow - i;
-        if (block_size_i > block_size) { // TODO: contains BUG!!! floor last one !!!
-            block_size_i = block_size;
-        } /// ...
-        // TODO:
-        // Copy blocks first column.
-        for (j = 0; j < block_size_i; j += 4) {
-            sum[j]     = A[j]     * A[j];
-            sum[j + 1] = A[j + 1] * A[j + 1];
-            sum[j + 2] = A[j + 2] * A[j + 2];
-            sum[j + 3] = A[j + 3] * A[j + 3];
-        }
-        for (; j < block_size_i; ++j) {
-            sum[j] = A[j] * A[j];
-        }
-        // Sum following columns to the first one.
-        for (A += nrow; A < A_end; A += nrow) {
-            for (j = 0; j < block_size_i; j += 4) {
-                sum[j]     += A[j]     * A[j];
-                sum[j + 1] += A[j + 1] * A[j + 1];
-                sum[j + 2] += A[j + 2] * A[j + 2];
-                sum[j + 3] += A[j + 3] * A[j + 3];
-            }
-            for (; j < block_size_i; ++j) {
-                sum[j] += A[j] * A[j];
-            }
-        }
-        // Step one block forth.
-        A_block += block_size_i;
-        sum += block_size_i;
-    }
-}
-
-void rowSumsSymVec(const double *Avec, const int nrow,
-                   const double diag,
-                   double *sum) {
-    int i, j;
-
-    if (diag == 0.0) {
-        memset(sum, 0, nrow * sizeof(double));
-    } else {
-        for (i = 0; i < nrow; ++i) {
-            sum[i] = diag;
-        }
-    }
-
-    for (j = 0; j < nrow; ++j) {
-        for (i = j + 1; i < nrow; ++i, ++Avec) {
-            sum[j] += *Avec;
-            sum[i] += *Avec;
-        }
-    }
-}
-
-#define ROW_SWEEP_ALG(op)                                  \
-    /* Iterate `(block_size_i, ncol)` submatrix blocks. */ \
-    for (i = 0; i < nrow; i += block_size_i) {             \
-        /* Set `A` and `C` to block beginning. */          \
-        A = A_block;                                       \
-        C = C_block;                                       \
-        /* Get current block's row size. */                \
-        block_size_i = nrow - i;                           \
-        if (block_size_i > block_size) {                   \
-            block_size_i = block_size;                     \
-        }                                                  \
-        /* Perform element wise operation for block. */    \
-        for (; A < A_end; A += nrow, C += nrow) {          \
-            for (j = 0; j < block_size_i; ++j) {           \
-                C[j] = (A[j]) op (v[j]);                   \
-            }                                              \
-        }                                                  \
-        /* Step one block forth. */                        \
-        A_block += block_size_i;                           \
-        C_block += block_size_i;                           \
-        v += block_size_i;                                 \
-    }
-
-/* C[, j] = A[, j] * v for each j = 1 to ncol */
-void rowSweep(const double *A, const int nrow, const int ncol,
-              const char* op,
-              const double *v, // vector of length nrow
-              double *C) {
-    int i, j, block_size, block_size_i;
-    const double *A_block = A;
-    double *C_block = C;
-    const double *A_end = A + nrow * ncol;
-
-    if (nrow > CVE_MEM_CHUNK_SMALL) { // small because 3 vectors in cache
-        block_size = CVE_MEM_CHUNK_SMALL;
-    } else {
-        block_size = nrow;
-    }
-
-    if (*op == '+') {
-        ROW_SWEEP_ALG(+)
-    } else if (*op == '-') {
-        ROW_SWEEP_ALG(-)
-    } else if (*op == '*') {
-        ROW_SWEEP_ALG(*)
-    } else if (*op == '/') {
-        ROW_SWEEP_ALG(/)
-    } else {
-        error("Got unknown 'op' (opperation) argument.");
-    }
-}
-
-void transpose(const double *A, const int nrow, const int ncol, double* T) {
-    int i, j, len = nrow * ncol;
-
-    // Filling column-wise and accessing row-wise.
-    for (i = 0, j = 0; i < len; ++i, j += nrow) {
-        if (j >= len) {
-            j -= len - 1;
-        }
-        T[i] = A[j];
-    }
-}
-
-// Symmetric Packed matrix vector product.
-// Computes
-//      y <- Ax
-// where A is supplied as packed lower triangular part of a symmetric
-// matrix A. Meaning that `vecA` is `vec_ltri(A)`.
-void sympMV(const double* vecA, const int nrow, const double* x, double* y) {
-    double one = 1.0;
-    double zero = 0.0;
-    int onei = 1;
-
-    F77_NAME(dspmv)("L", &nrow, &one, vecA, x, &onei, &zero, y, &onei);
-}
-
-void matrixprod(const double *A, const int nrowA, const int ncolA,
-                const double *B, const int nrowB, const int ncolB,
-                double *C) {
-    const double one = 1.0;
-    const double zero = 0.0;
-
-    // DGEMM with parameterization:
-    //     C <- A %*% B
-    F77_NAME(dgemm)("N", "N", &nrowA, &ncolB, &ncolA,
-                    &one, A, &nrowA, B, &nrowB,
-                    &zero, C, &nrowA);
-}
-
-void crossprod(const double *A, const int nrowA, const int ncolA,
-               const double *B, const int nrowB, const int ncolB,
-               double *C) {
-    const double one = 1.0;
-    const double zero = 0.0;
-
-    // DGEMM with parameterization:
-    //     C <- t(A) %*% B
-    F77_NAME(dgemm)("T", "N", &ncolA, &ncolB, &nrowA,
-                    &one, A, &nrowA, B, &nrowB,
-                    &zero, C, &ncolA);
-}
-
-#define KRONECKER_ALG(op)                                                      \
-    for (j = 0; j < ncolA; ++j) {                                              \
-        for (l = 0; l < ncolB; ++l) {                                          \
-            colB = B + (l * nrowB);                                            \
-            for (i = 0; i < nrowA; ++i) {                                      \
-                for (k = 0; k < nrowB; ++k) {                                  \
-                    *(C++) = (A[i]) op (colB[k]);                              \
-                }                                                              \
-            }                                                                  \
-        }                                                                      \
-        A += nrowA;                                                            \
-    }
-
-void kronecker(const double * restrict A, const int nrowA, const int ncolA,
-               const double * restrict B, const int nrowB, const int ncolB,
-               const char* op,
-               double * restrict C) {
-    int i, j, k, l;
-    const double *colB;
-
-    if (*op == '+') {
-        KRONECKER_ALG(+)
-    } else if (*op == '-') {
-        KRONECKER_ALG(-)
-    } else if (*op == '*') {
-        KRONECKER_ALG(*)
-    } else if (*op == '/') {
-        KRONECKER_ALG(/)
-    } else {
-        error("Got unknown 'op' (opperation) argument.");
-    }
-}
-
-void nullProj(const double *B, const int nrowB, const int ncolB,
-              double *Q) {
-    const double minusOne = -1.0;
-    const double one = 1.0;
-
-    // Initialize Q as identity matrix.
-    memset(Q, 0, sizeof(double) * nrowB * nrowB);
-    double *Q_diag, *Q_end = Q + nrowB * nrowB;
-    for (Q_diag = Q; Q_diag < Q_end; Q_diag += nrowB + 1) {
-        *Q_diag = 1.0;
-    }
-
-    // DGEMM with parameterization:
-    //     Q <- (-1.0 * B %*% t(B)) + Q
-    F77_NAME(dgemm)("N", "T", &nrowB, &nrowB, &ncolB,
-                    &minusOne, B, &nrowB, B, &nrowB,
-                    &one, Q, &nrowB);
-}
-
-void rangePairs(const int from, const int to, int *pairs) {
-    int i, j;
-    for (i = from; i < to; ++i) {
-       for (j = i + 1; j < to; ++j) {
-            pairs[0] = i;
-            pairs[1] = j;
-            pairs += 2;
-        }
-    }
-}
-
-// A dence skwe-symmetric rank 2 update.
-// Perform the update
-//      C := alpha (A * B^T - B * A^T) + beta C
-void skewSymRank2k(const int nrow, const int ncol,
-                   double alpha, const double *A, const double *B,
-                   double beta,
-                   double *C) {
-    F77_NAME(dgemm)("N", "T",
-                    &nrow, &nrow, &ncol,
-                    &alpha, A, &nrow, B, &nrow,
-                    &beta, C, &nrow);
-    alpha *= -1.0;
-    beta = 1.0;
-    F77_NAME(dgemm)("N", "T",
-                    &nrow, &nrow, &ncol,
-                    &alpha, B, &nrow, A, &nrow,
-                    &beta, C, &nrow);
-}
-// TODO: clarify
-static inline double gaussKernel(const double x, const double scale) {
-    return exp(scale * x * x);
-}
-
-// TODO: mutch potential for optimization!!!
-static void weightedYandLoss(const int n,
-                             const double *Y,
-                             const double *vecD,
-                             const double *vecW,
-                             const double *colSums,
-                             double *y1, double *L, double *vecS,
-                             double *loss) {
-    int i, j, k, N = n * (n - 1) / 2;
-    double l;
-
-    for (i = 0; i < n; ++i) {
-        y1[i] = Y[i];
-        L[i] = Y[i] * Y[i];
-    }
-
-    for (k = j = 0; j < n; ++j) {
-        for (i = j + 1; i < n; ++i, ++k) {
-            y1[i] += Y[j] * vecW[k];
-            y1[j] += Y[i] * vecW[k];
-            L[i] += Y[j] * Y[j] * vecW[k];
-            L[j] += Y[i] * Y[i] * vecW[k];
-        }
-    }
-
-    for (i = 0; i < n; ++i) {
-        y1[i] /= colSums[i];
-        L[i] /= colSums[i];
-    }
-
-    l = 0.0;
-    for (i = 0; i < n; ++i) {
-        l += (L[i] -= y1[i] * y1[i]);
-    }
-    *loss = l / (double)n;
-
-    for (k = j = 0; j < n; ++j) {
-        for (i = j + 1; i < n; ++i, ++k) {
-            l = Y[j] - y1[i];
-            vecS[k]  = (L[i] - (l * l)) / colSums[i];
-            l = Y[i] - y1[j];
-            vecS[k] += (L[j] - (l * l)) / colSums[j];
-        }
-    }
-
-    for (k = 0; k < N; ++k) {
-        vecS[k] *= vecW[k] * vecD[k];
-    }
-}
-
-void grad(const int n, const int p, const int q,
-          const double *X,
-          const double *X_diff,
-          const double *Y,
-          const double *V,
-          const double h,
-          double *G, double *loss) {
-    // Number of X_i to X_j not trivial pairs.
-    int i, N = (n * (n - 1)) / 2;
-    double scale = -0.5 / h;
-
-    if (X_diff == (void*)0) {
-        // TODO: ...
-    }
-
-    // Allocate and compute projection matrix `Q = I_p - V * V^T`
-    double *Q = (double*)malloc(p * p * sizeof(double));
-    nullProj(V, p, q, Q);
-
-    // allocate and compute vectorized distance matrix with a temporary
-    // projection of `X_diff`.
-    double *vecD = (double*)malloc(N * sizeof(double));
-    double *X_proj;
-    if (p < 5) { // TODO: refine that!
-        X_proj = (double*)malloc(N * 5 * sizeof(double));
-    } else {
-        X_proj = (double*)malloc(N * p * sizeof(double));
-    }
-    matrixprod(X_diff, N, p, Q, p, p, X_proj);
-    rowSquareSums(X_proj, N, p, vecD);
-
-    // Apply kernel to distence vector for weights computation.
-    double *vecK = X_proj; // reuse memory area, no longer needed.
-    for (i = 0; i < N; ++i) {
-        vecK[i] = gaussKernel(vecD[i], scale);
-    }
-    double *colSums = X_proj + N; // still allocated!
-    rowSumsSymVec(vecK, n, 1.0, colSums); // rowSums = colSums cause Sym
-
-    // compute weighted responces of first end second momontum, aka y1, y2.
-    double *y1 = X_proj + N + n;
-    double *L = X_proj + N + (2 * n);
-    // Allocate X_diff scaling vector `vecS`, not in `X_proj` mem area because
-    // used symultanious to X_proj in final gradient computation.
-    double *vecS = (double*)malloc(N * sizeof(double));
-    weightedYandLoss(n, Y, vecD, vecK, colSums, y1, L, vecS, loss);
-
-    // compute the gradient using X_proj for intermidiate scaled X_diff.
-    rowSweep(X_diff, N, p, "*", vecS, X_proj);
-    // reuse Q which has the required dim (p, p).
-    crossprod(X_diff, N, p, X_proj, N, p, Q);
-    // Product with V
-    matrixprod(Q, p, p, V, p, q, G);
-    // And final scaling (TODO: move into matrixprod!)
-    scale = -2.0 / (((double)n) * h * h);
-    N = p * q;
-    for (i = 0; i < N; ++i) {
-        G[i] *= scale;
-    }
-
-    free(vecS);
-    free(X_proj);
-    free(vecD);
-    free(Q);
-}
--- a/benchmark/benchmark.h
+++ b/benchmark/benchmark.h
@ -1,219 +0,0 @@
-#ifndef CVE_INCLUDE_GUARD_
-#define CVE_INCLUDE_GUARD_
-
-#include <Rinternals.h>
-
-#define CVE_MEM_CHUNK_SMALL 1016
-#define CVE_MEM_CHUNK_SIZE 2032
-
-void rowSums(const double *A,
-             const int nrow, const int ncol,
-             double *sum);
-SEXP R_rowSums(SEXP A) {
-    SEXP sums = PROTECT(allocVector(REALSXP, nrows(A)));
-
-    rowSums(REAL(A), nrows(A), ncols(A), REAL(sums));
-
-    UNPROTECT(1);
-    return sums;
-}
-void rowSumsV2(const double *A,
-               const int nrow, const int ncol,
-               double *sum);
-SEXP R_rowSumsV2(SEXP A) {
-    SEXP sums = PROTECT(allocVector(REALSXP, nrows(A)));
-
-    rowSumsV2(REAL(A), nrows(A), ncols(A), REAL(sums));
-
-    UNPROTECT(1);
-    return sums;
-}
-void rowSumsV3(const double *A,
-               const int nrow, const int ncol,
-               double *sum);
-SEXP R_rowSumsV3(SEXP A) {
-    SEXP sums = PROTECT(allocVector(REALSXP, nrows(A)));
-
-    rowSumsV3(REAL(A), nrows(A), ncols(A), REAL(sums));
-
-    UNPROTECT(1);
-    return sums;
-}
-
-void colSums(const double *A,
-             const int nrow, const int ncol,
-             double *sum);
-SEXP R_colSums(SEXP A) {
-    SEXP sums = PROTECT(allocVector(REALSXP, ncols(A)));
-
-    colSums(REAL(A), nrows(A), ncols(A), REAL(sums));
-
-    UNPROTECT(1);
-    return sums;
-}
-
-void rowSquareSums(const double*, const int, const int, double*);
-SEXP R_rowSquareSums(SEXP A) {
-    SEXP result = PROTECT(allocVector(REALSXP, nrows(A)));
-
-    rowSquareSums(REAL(A), nrows(A), ncols(A), REAL(result));
-
-    UNPROTECT(1);
-    return result;
-}
-
-void rowSumsSymVec(const double *Avec, const int nrow,
-                   const double diag,
-                   double *sum);
-SEXP R_rowSumsSymVec(SEXP Avec, SEXP nrow, SEXP diag) {
-    SEXP sum = PROTECT(allocVector(REALSXP, *INTEGER(nrow)));
-
-    rowSumsSymVec(REAL(Avec), *INTEGER(nrow), *REAL(diag), REAL(sum));
-
-    UNPROTECT(1);
-    return sum;
-}
-
-void rowSweep(const double *A, const int nrow, const int ncol,
-              const char* op,
-              const double *v, // vector of length nrow
-              double *C);
-SEXP R_rowSweep(SEXP A, SEXP v, SEXP op) {
-    SEXP C = PROTECT(allocMatrix(REALSXP, nrows(A), ncols(A)));
-
-    rowSweep(REAL(A), nrows(A), ncols(A),
-             CHAR(STRING_ELT(op, 0)),
-             REAL(v), REAL(C));
-
-    UNPROTECT(1);
-    return C;
-}
-
-void transpose(const double *A, const int nrow, const int ncol, double* T);
-SEXP R_transpose(SEXP A) {
-    SEXP T = PROTECT(allocMatrix(REALSXP, ncols(A), nrows(A)));
-
-    transpose(REAL(A), nrows(A), ncols(A), REAL(T));
-
-    UNPROTECT(1); /* T */
-    return T;
-}
-
-void sympMV(const double* vecA, const int nrow, const double* x, double* y);
-SEXP R_sympMV(SEXP vecA, SEXP x) {
-    SEXP y = PROTECT(allocVector(REALSXP, length(x)));
-
-    sympMV(REAL(vecA), length(x), REAL(x), REAL(y));
-
-    UNPROTECT(1); /* y */
-    return y;
-}
-
-void matrixprod(const double *A, const int nrowA, const int ncolA,
-                const double *B, const int nrowB, const int ncolB,
-                double *C);
-SEXP R_matrixprod(SEXP A, SEXP B) {
-    SEXP C = PROTECT(allocMatrix(REALSXP, nrows(A), ncols(B)));
-
-    matrixprod(REAL(A), nrows(A), ncols(A),
-               REAL(B), nrows(B), ncols(B),
-               REAL(C));
-
-    UNPROTECT(1);
-    return C;
-}
-
-void crossprod(const double* A, const int nrowA, const int ncolA,
-               const double* B, const int ncolB, const int nrowB,
-               double* C);
-SEXP R_crossprod(SEXP A, SEXP B) {
-    SEXP C = PROTECT(allocMatrix(REALSXP, ncols(A), ncols(B)));
-
-    crossprod(REAL(A), nrows(A), ncols(A),
-              REAL(B), nrows(B), ncols(B),
-              REAL(C));
-
-    UNPROTECT(1);
-    return C;
-}
-
-void kronecker(const double *A, const int nrowA, const int ncolA,
-               const double *B, const int nrowB, const int ncolB,
-               const char *op,
-               double *C);
-SEXP R_kronecker(SEXP A, SEXP B, SEXP op) {
-    SEXP C = PROTECT(allocMatrix(REALSXP,
-                                 nrows(A) * nrows(B),
-                                 ncols(A) * ncols(B)));
-
-    kronecker(REAL(A), nrows(A), ncols(A),
-              REAL(B), nrows(B), ncols(B),
-              CHAR(STRING_ELT(op, 0)),
-              REAL(C));
-
-    UNPROTECT(1);
-    return C;
-}
-
-void skewSymRank2k(const int n, const int k,
-                   double alpha, const double *A, const double *B,
-                   double beta,
-                   double *C);
-SEXP R_skewSymRank2k(SEXP A, SEXP B, SEXP alpha, SEXP beta) {
-    SEXP C = PROTECT(allocMatrix(REALSXP, nrows(A), nrows(A)));
-    memset(REAL(C), 0, nrows(A) * nrows(A) * sizeof(double));
-
-    skewSymRank2k(nrows(A), ncols(A),
-                  *REAL(alpha), REAL(A), REAL(B),
-                  *REAL(beta), REAL(C));
-
-    UNPROTECT(1);
-    return C;
-}
-
-void nullProj(const double* B, const int nrowB, const int ncolB,
-              double* Q);
-SEXP R_nullProj(SEXP B) {
-    SEXP Q = PROTECT(allocMatrix(REALSXP, nrows(B), nrows(B)));
-
-    nullProj(REAL(B), nrows(B), ncols(B), REAL(Q));
-
-    UNPROTECT(1);
-    return Q;
-}
-
-void rangePairs(const int from, const int to, int *pairs);
-SEXP R_rangePairs(SEXP from, SEXP to) {
-    int start = asInteger(from);
-    int end = asInteger(to) + 1;
-    int n = end - start;
-
-    SEXP out = PROTECT(allocMatrix(INTSXP, 2, n * (n - 1) / 2));
-    rangePairs(start, end, INTEGER(out));
-
-    UNPROTECT(1);
-    return out;
-}
-
-void grad(const int n, const int p, const int q,
-          const double *X,
-          const double *X_diff,
-          const double *Y,
-          const double *V,
-          const double h,
-          double *G, double *const loss);
-SEXP R_grad(SEXP X, SEXP X_diff, SEXP Y, SEXP V, SEXP h) {
-    int N = (nrows(X) * (nrows(X) - 1)) / 2;
-
-    SEXP G = PROTECT(allocMatrix(REALSXP, nrows(V), ncols(V)));
-    SEXP loss = PROTECT(allocVector(REALSXP, 1));
-
-    grad(nrows(X), ncols(X), ncols(V),
-         REAL(X), REAL(X_diff), REAL(Y), REAL(V), *REAL(h),
-         REAL(G), REAL(loss));
-
-    UNPROTECT(2);
-    return G;
-}
-
-#endif /* CVE_INCLUDE_GUARD_ */