diff --git a/CVE_C/R/CVE.R b/CVE_C/R/CVE.R
index bd41ae8..0537bb7 100644
--- a/CVE_C/R/CVE.R
+++ b/CVE_C/R/CVE.R
@@ -1,59 +1,70 @@
-#' Conditional Variance Estimator (CVE)
+#' Conditional Variance Estimator (CVE) Package.
 #'
-#' Conditional Variance Estimator for Sufficient Dimension
-#'  Reduction
+#' Conditional Variance Estimation (CVE) is a novel sufficient dimension
+#' reduction (SDR) method for regressions satisfying \eqn{E(Y|X) = E(Y|B'X)},
+#' where \eqn{B'X} is a lower dimensional projection of the predictors. CVE,
+#' similarly to its main competitor, the mean average variance estimation
+#' (MAVE), is not based on inverse regression, and does not require the
+#' restrictive linearity and constant variance conditions of moment based SDR
+#' methods. CVE is data-driven and applies to additive error regressions with
+#' continuous predictors and link function. The effectiveness and accuracy of 
+#' CVE compared to MAVE and other SDR techniques is demonstrated in simulation
+#' studies. CVE is shown to outperform MAVE in some model set-ups, while it
+#' remains largely on par under most others.
 #'
-#' TODO: And some details
+#' Let \eqn{Y} be real denotes a univariate response and \eqn{X} a real
+#' \eqn{p}-dimensional covariate vector. We assume that the dependence of
+#' \eqn{Y} and \eqn{X} is modelled by
+#' \deqn{Y = g(B'X) + \epsilon}
+#' where \eqn{X} is independent of \eqn{\epsilon} with positive definite
+#' variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
+#' zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
+#' is an unknown, continuous non-constant function,
+#' and \eqn{B = (b_1, ..., b_k)} is
+#' a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}. 
+#' Without loss of generality \eqn{B} is assumed to be orthonormal.
 #'
+#' @author Daniel Kapla, Lukas Fertl, Bura Efstathia
+#' @references Fertl Lukas, Bura Efstathia. Conditional Variance Estimation for
+#' Sufficient Dimension Reduction, 2019
 #'
-#' @references Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
-#'
+#' @importFrom stats model.frame
 #' @docType package
-#' @author Loki
 #' @useDynLib CVE, .registration = TRUE
 "_PACKAGE"
 
-#' Implementation of the CVE method.
+#' Conditional Variance Estimator (CVE).
 #'
-#' Conditional Variance Estimator (CVE) is a novel sufficient dimension
-#' reduction (SDR) method assuming a model
-#' \deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
-#' where B'X is a lower dimensional projection of the predictors.
+#' TODO: reuse of package description and details!!!!
 #'
-#' @param formula Formel for the regression model defining `X`, `Y`.
-#'  See: \code{\link{formula}}.
-#' @param data data.frame holding data for formula.
-#' @param method The different only differe in the used optimization.
-#'  All of them are Gradient based optimization on a Stiefel manifold.
+#' @param formula an object of class \code{"formula"} which is a symbolic
+#' description of the model to be fitted.
+#' @param data an optional data frame, containing the data for the formula if
+#' supplied.
+#' @param method specifies the CVE method variation as one of
 #' \itemize{
-#'      \item "simple" Simple reduction of stepsize.
-#'      \item "sgd" stocastic gradient decent.
-#'      \item TODO: further
+#'      \item "simple" exact implementation as describet in the paper listed 
+#'          below.
+#'      \item "weighted" variation with addaptive weighting of slices.
 #' }
-#' @param ... Further parameters depending on the used method.
+#' @param ... Parameters passed on to \code{cve.call}.
 #' @examples
 #' library(CVE)
 #'
-#' # sample dataset
-#' ds <- dataset("M5")
+#' # create dataset
+#' n <- 200
+#' p <- 12
+#' X <- matrix(rnorm(n * p), n, p)
+#' B <- cbind(c(1, rep(0, p - 1)), c(0, 1, rep(0, p - 2)))
+#' Y <- X %*% B
+#' Y <- Y[, 1L]^2 + Y[, 2L]^2 + rnorm(n, 0, 0.3)
 #'
-#' # call ´cve´ with default method (aka "simple")
-#' dr.simple <- cve(ds$Y ~ ds$X, k = ncol(ds$B))
-#' # plot optimization history (loss via iteration)
-#' plot(dr.simple, main = "CVE M5 simple")
+#' # Call the CVE method.
+#' dr <- cve(Y ~ X)
+#' round(dr[[2]]$B, 1)
 #'
-#' # call ´cve´ with method "linesearch" using ´data.frame´ as data.
-#' data <- data.frame(Y = ds$Y, X = ds$X)
-#' # Note: ´Y, X´ are NOT defined, they are extracted from ´data´.
-#' dr.linesearch <- cve(Y ~ ., data, method = "linesearch", k = ncol(ds$B))
-#' plot(dr.linesearch, main = "CVE M5 linesearch")
-#'
-#' @references Fertl L., Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
-#'
-#' @seealso \code{\link{formula}}. For a complete parameters list (dependent on
-#'  the method) see \code{\link{cve_simple}}, \code{\link{cve_sgd}}
-#' @import stats
-#' @importFrom stats model.frame
+#' @seealso For a detailed description of the formula parameter see
+#'      [\code{\link{formula}}].
 #' @export
 cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
     # check for type of `data` if supplied and set default
@@ -76,12 +87,19 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
     return(dr)
 }
 
-#' @param nObs as describet in the Paper.
-#' @param X Data
-#' @param Y Responces
-#' @param nObs Like in the paper.
-#' @param k guess for SDR dimension.
-#' @param ... Method specific parameters.
+#' @param nObs parameter for choosing bandwidth \code{h} using
+#'   \code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied).
+#' @param X data matrix with samples in its rows.
+#' @param Y Responces (1 dimensional).
+#' @param k Dimension of lower dimensional projection, if given only the
+#'      specified dimension is estimated.
+#' @param min.dim lower bounds for \code{k}, (ignored if \code{k} is supplied).
+#' @param max.dim upper bounds for \code{k}, (ignored if \code{k} is supplied).
+#' @param tau Initial step-size.
+#' @param tol Tolerance for break condition.
+#' @param epochs maximum number of optimization steps.
+#' @param attempts number of arbitrary different starting points.
+#' @param logger a logger function (only for addvanced user).
 #' @rdname cve
 #' @export
 cve.call <- function(X, Y, method = "simple",
@@ -122,9 +140,16 @@ cve.call <- function(X, Y, method = "simple",
         stop("'max.dim' (or 'k') must be smaller than 'ncol(X)'.")
     }
 
-    if (is.function(h)) {
+    if (missing(h) || is.null(h)) {
+        estimate <- TRUE
+    } else if (is.function(h)) {
+        estimate <- TRUE
         estimate.bandwidth <- h
-        h <- NULL
+    } else if (is.numeric(h) && h > 0.0) {
+        estimate <- FALSE
+        h <- as.double(h)
+    } else {
+        stop("Bandwidth 'h' must be positive numeric.")
     }
 
     if (!is.numeric(tau) || length(tau) > 1L || tau <= 0.0) {
@@ -167,12 +192,8 @@ cve.call <- function(X, Y, method = "simple",
     dr <- list()
     for (k in min.dim:max.dim) {
 
-        if (missing(h) || is.null(h)) {
+        if (estimate) {
             h <- estimate.bandwidth(X, k, nObs)
-        } else if (is.numeric(h) && h > 0.0) {
-            h <- as.double(h)
-        } else {
-            stop("Bandwidth 'h' must be positive numeric.")
         }
 
         if (method == 'simple') {
@@ -206,32 +227,22 @@ cve.call <- function(X, Y, method = "simple",
     }
 
     # augment result information
+    dr$X <- X
+    dr$Y <- Y
     dr$method <- method
     dr$call <- call
     class(dr) <- "cve"
     return(dr)
 }
 
-# TODO: write summary
-# summary.cve <- function() {
-#     # code #
-# }
-
-#' Ploting helper for objects of class \code{cve}.
+#' Loss distribution kink plot.
 #'
-#' @param x Object of class \code{cve} (result of [cve()]).
-#' @param content Specifies what to plot:
-#' \itemize{
-#'      \item "history" Plots the loss history from stiefel optimization
-#'          (default).
-#'      \item ... TODO: add (if there are any)
-#' }
-#' @param ... Pass through parameters to [plot()] and [lines()]
+#' @param x Object of class \code{"cve"} (result of [\code{\link{cve}}]).
+#' @param ... Pass through parameters to [\code{\link{plot}}] and
+#'      [\code{\link{lines}}]
 #'
-#' @usage ## S3 method for class 'cve'
-#' plot(x, content = "history", ...)
 #' @seealso see \code{\link{par}} for graphical parameters to pass through
-#'      as well as \code{\link{plot}} for standard plot utility.
+#'      as well as \code{\link{plot}}, the standard plot utility.
 #' @importFrom graphics plot lines points
 #' @method plot cve
 #' @export
@@ -244,12 +255,12 @@ plot.cve <- function(x, ...) {
             L <- c(L, dr.k$L)
         }
     }
-    L <- matrix(L, ncol = length(k))
-    boxplot(L, main = "Loss ...",
-            xlab = "SDR dimension k",
-            ylab = expression(L(V, X[i])),
+    L <- matrix(L, ncol = length(k)) / var(x$Y)
+    boxplot(L, main = "Kink plot",
+            xlab = "SDR dimension",
+            ylab = "Sample loss distribution",
             names = k)
-
+    # lines(apply(L, 2, mean)) # TODO: ?
 }
 
 #' Prints a summary of a \code{cve} result.
diff --git a/CVE_C/R/datasets.R b/CVE_C/R/datasets.R
index 68af0f4..1e9380d 100644
--- a/CVE_C/R/datasets.R
+++ b/CVE_C/R/datasets.R
@@ -23,13 +23,15 @@
 #' The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
 #' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
 #' The link function \eqn{g} is given as
-#' \deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + 0.5\epsilon}{g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + 0.5 epsilon}
+#' \deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + \epsilon / 2}{%
+#'       g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + epsilon / 2}
 #' @section M2:
-#' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a default of \eqn{n = 200} data points.
+#' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a
+#' default of \eqn{n = 200} data points.
 #' The link function \eqn{g} is given as
-#' \deqn{g(x) = x_1 x_2^2 + 0.5\epsilon}{g(x) = x_1 x_2^2 + 0.5 epsilon}
+#' \deqn{g(x) = (b_1^T X) (b_2^T X)^2 + \epsilon / 2}
 #' @section M3:
-#' TODO:
+#' \deqn{g(x) = cos(b_1^T X) + \epsilon / 2}
 #' @section M4:
 #' TODO:
 #' @section M5:
diff --git a/CVE_C/R/estimateBandwidth.R b/CVE_C/R/estimateBandwidth.R
index 8e7edc7..b03538e 100644
--- a/CVE_C/R/estimateBandwidth.R
+++ b/CVE_C/R/estimateBandwidth.R
@@ -1,17 +1,16 @@
-#' Estimated bandwidth for CVE.
+#' Bandwidth estimation for CVE.
 #'
 #' Estimates a propper bandwidth \code{h} according
 #' \deqn{%
-#' h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
-#' h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
+#' h = \chi_{k}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
+#' h = qchisq( (nObs - 1)/(n - 1), k ) * (2 tr(\Sigma) / p)}
+#' with \eqn{n} the number of sample and \eqn{p} its dimension
+#' (\code{n <- nrow(X); p <- ncol(X)}) and the covariance-matrix \eqn{\Sigma}
+#' which is given by the standard maximum likelihood estimate.
 #'
-#' @param X data matrix of dimension (n x p) with n data points X_i of dimension
-#'  q. Therefor each row represents a datapoint of dimension p.
-#' @param k Guess for rank(B).
-#' @param nObs Ether numeric of a function. If specified as numeric value
-#'  its used in the computation of the bandwidth directly. If its a function
-#'  `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not
-#'  supplied at all is to use \code{nObs <- nrow(x)^0.5}.
+#' @param nObs Expected number of points in a slice, see paper.
+#' @param X data matrix with samples in its rows.
+#' @param k Dimension of lower dimensional projection.
 #'
 #' @seealso [\code{\link{qchisq}}]
 #' @export
@@ -19,7 +18,7 @@ estimate.bandwidth <- function(X, k, nObs) {
     n <- nrow(X)
     p <- ncol(X)
 
-    X_centered <- scale(X, center=TRUE, scale=FALSE)
+    X_centered <- scale(X, center = TRUE, scale = FALSE)
     Sigma <- (1 / n) * t(X_centered) %*% X_centered
 
     quantil <- qchisq((nObs - 1) / (n - 1), k)
diff --git a/CVE_C/man/CVE-package.Rd b/CVE_C/man/CVE-package.Rd
index 8bb0f5d..52b6468 100644
--- a/CVE_C/man/CVE-package.Rd
+++ b/CVE_C/man/CVE-package.Rd
@@ -4,17 +4,37 @@
 \name{CVE-package}
 \alias{CVE}
 \alias{CVE-package}
-\title{Conditional Variance Estimator (CVE)}
+\title{Conditional Variance Estimator (CVE) Package.}
 \description{
-Conditional Variance Estimator for Sufficient Dimension
- Reduction
+Conditional Variance Estimation (CVE) is a novel sufficient dimension
+reduction (SDR) method for regressions satisfying \eqn{E(Y|X) = E(Y|B'X)},
+where \eqn{B'X} is a lower dimensional projection of the predictors. CVE,
+similarly to its main competitor, the mean average variance estimation
+(MAVE), is not based on inverse regression, and does not require the
+restrictive linearity and constant variance conditions of moment based SDR
+methods. CVE is data-driven and applies to additive error regressions with
+continuous predictors and link function. The effectiveness and accuracy of 
+CVE compared to MAVE and other SDR techniques is demonstrated in simulation
+studies. CVE is shown to outperform MAVE in some model set-ups, while it
+remains largely on par under most others.
 }
 \details{
-TODO: And some details
+Let \eqn{Y} be real denotes a univariate response and \eqn{X} a real
+\eqn{p}-dimensional covariate vector. We assume that the dependence of
+\eqn{Y} and \eqn{X} is modelled by
+\deqn{Y = g(B'X) + \epsilon}
+where \eqn{X} is independent of \eqn{\epsilon} with positive definite
+variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
+zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
+is an unknown, continuous non-constant function,
+and \eqn{B = (b_1, ..., b_k)} is
+a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}. 
+Without loss of generality \eqn{B} is assumed to be orthonormal.
 }
 \references{
-Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
+Fertl Lukas, Bura Efstathia. Conditional Variance Estimation for
+Sufficient Dimension Reduction, 2019
 }
 \author{
-Loki
+Daniel Kapla, Lukas Fertl, Bura Efstathia
 }
diff --git a/CVE_C/man/cve.Rd b/CVE_C/man/cve.Rd
index cb8e03e..03674d0 100644
--- a/CVE_C/man/cve.Rd
+++ b/CVE_C/man/cve.Rd
@@ -3,7 +3,7 @@
 \name{cve}
 \alias{cve}
 \alias{cve.call}
-\title{Implementation of the CVE method.}
+\title{Conditional Variance Estimator (CVE).}
 \usage{
 cve(formula, data, method = "simple", max.dim = 10L, ...)
 
@@ -12,61 +12,65 @@ cve.call(X, Y, method = "simple", nObs = sqrt(nrow(X)), h = NULL,
   epochs = 50L, attempts = 10L, logger = NULL)
 }
 \arguments{
-\item{formula}{Formel for the regression model defining `X`, `Y`.
-See: \code{\link{formula}}.}
+\item{formula}{an object of class \code{"formula"} which is a symbolic
+description of the model to be fitted.}
 
-\item{data}{data.frame holding data for formula.}
+\item{data}{an optional data frame, containing the data for the formula if
+supplied.}
 
-\item{method}{The different only differe in the used optimization.
- All of them are Gradient based optimization on a Stiefel manifold.
+\item{method}{specifies the CVE method variation as one of
 \itemize{
-     \item "simple" Simple reduction of stepsize.
-     \item "sgd" stocastic gradient decent.
-     \item TODO: further
+     \item "simple" exact implementation as describet in the paper listed 
+         below.
+     \item "weighted" variation with addaptive weighting of slices.
 }}
 
-\item{...}{Further parameters depending on the used method.}
+\item{max.dim}{upper bounds for \code{k}, (ignored if \code{k} is supplied).}
 
-\item{X}{Data}
+\item{...}{Parameters passed on to \code{cve.call}.}
 
-\item{Y}{Responces}
+\item{X}{data matrix with samples in its rows.}
 
-\item{nObs}{as describet in the Paper.}
+\item{Y}{Responces (1 dimensional).}
 
-\item{k}{guess for SDR dimension.}
+\item{nObs}{parameter for choosing bandwidth \code{h} using
+\code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied).}
 
-\item{nObs}{Like in the paper.}
+\item{min.dim}{lower bounds for \code{k}, (ignored if \code{k} is supplied).}
 
-\item{...}{Method specific parameters.}
+\item{k}{Dimension of lower dimensional projection, if given only the
+specified dimension is estimated.}
+
+\item{tau}{Initial step-size.}
+
+\item{tol}{Tolerance for break condition.}
+
+\item{epochs}{maximum number of optimization steps.}
+
+\item{attempts}{number of arbitrary different starting points.}
+
+\item{logger}{a logger function (only for addvanced user).}
 }
 \description{
-Conditional Variance Estimator (CVE) is a novel sufficient dimension
-reduction (SDR) method assuming a model
-\deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
-where B'X is a lower dimensional projection of the predictors.
+TODO: reuse of package description and details!!!!
 }
 \examples{
 library(CVE)
 
-# sample dataset
-ds <- dataset("M5")
+# create dataset
+n <- 200
+p <- 12
+X <- matrix(rnorm(n * p), n, p)
+B <- cbind(c(1, rep(0, p - 1)), c(0, 1, rep(0, p - 2)))
+Y <- X \%*\% B
+Y <- Y[, 1L]^2 + Y[, 2L]^2 + rnorm(n, 0, 0.3)
 
-# call ´cve´ with default method (aka "simple")
-dr.simple <- cve(ds$Y ~ ds$X, k = ncol(ds$B))
-# plot optimization history (loss via iteration)
-plot(dr.simple, main = "CVE M5 simple")
+# Call the CVE method.
+dr <- cve(Y ~ X)
+round(dr[[2]]$B, 1)
 
-# call ´cve´ with method "linesearch" using ´data.frame´ as data.
-data <- data.frame(Y = ds$Y, X = ds$X)
-# Note: ´Y, X´ are NOT defined, they are extracted from ´data´.
-dr.linesearch <- cve(Y ~ ., data, method = "linesearch", k = ncol(ds$B))
-plot(dr.linesearch, main = "CVE M5 linesearch")
-
-}
-\references{
-Fertl L., Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
 }
 \seealso{
-\code{\link{formula}}. For a complete parameters list (dependent on
- the method) see \code{\link{cve_simple}}, \code{\link{cve_sgd}}
+For a detailed description of the formula parameter see
+     [\code{\link{formula}}].
 }
diff --git a/CVE_C/man/dataset.Rd b/CVE_C/man/dataset.Rd
index da7be5b..d68db13 100644
--- a/CVE_C/man/dataset.Rd
+++ b/CVE_C/man/dataset.Rd
@@ -37,19 +37,21 @@ The general model is given by:
 The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
 dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
 The link function \eqn{g} is given as
-\deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + 0.5\epsilon}{g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + 0.5 epsilon}
+\deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + \epsilon / 2}{%
+      g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + epsilon / 2}
 }
 
 \section{M2}{
 
-\eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a default of \eqn{n = 200} data points.
+\eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a
+default of \eqn{n = 200} data points.
 The link function \eqn{g} is given as
-\deqn{g(x) = x_1 x_2^2 + 0.5\epsilon}{g(x) = x_1 x_2^2 + 0.5 epsilon}
+\deqn{g(x) = (b_1^T X) (b_2^T X)^2 + \epsilon / 2}
 }
 
 \section{M3}{
 
-TODO:
+\deqn{g(x) = cos(b_1^T X) + \epsilon / 2}
 }
 
 \section{M4}{
diff --git a/CVE_C/man/estimate.bandwidth.Rd b/CVE_C/man/estimate.bandwidth.Rd
index 4b2ae94..882d994 100644
--- a/CVE_C/man/estimate.bandwidth.Rd
+++ b/CVE_C/man/estimate.bandwidth.Rd
@@ -2,26 +2,25 @@
 % Please edit documentation in R/estimateBandwidth.R
 \name{estimate.bandwidth}
 \alias{estimate.bandwidth}
-\title{Estimated bandwidth for CVE.}
+\title{Bandwidth estimation for CVE.}
 \usage{
 estimate.bandwidth(X, k, nObs)
 }
 \arguments{
-\item{X}{data matrix of dimension (n x p) with n data points X_i of dimension
-q. Therefor each row represents a datapoint of dimension p.}
+\item{X}{data matrix with samples in its rows.}
 
-\item{k}{Guess for rank(B).}
+\item{k}{Dimension of lower dimensional projection.}
 
-\item{nObs}{Ether numeric of a function. If specified as numeric value
-its used in the computation of the bandwidth directly. If its a function
-`nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not
-supplied at all is to use \code{nObs <- nrow(x)^0.5}.}
+\item{nObs}{Expected number of points in a slice, see paper.}
 }
 \description{
 Estimates a propper bandwidth \code{h} according
 \deqn{%
-h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
-h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
+h = \chi_{k}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
+h = qchisq( (nObs - 1)/(n - 1), k ) * (2 tr(\Sigma) / p)}
+with \eqn{n} the number of sample and \eqn{p} its dimension
+(\code{n <- nrow(X); p <- ncol(X)}) and the covariance-matrix \eqn{\Sigma}
+which is given by the standard maximum likelihood estimate.
 }
 \seealso{
 [\code{\link{qchisq}}]
diff --git a/CVE_C/man/plot.cve.Rd b/CVE_C/man/plot.cve.Rd
index 35f6921..1fd71b2 100644
--- a/CVE_C/man/plot.cve.Rd
+++ b/CVE_C/man/plot.cve.Rd
@@ -2,27 +2,20 @@
 % Please edit documentation in R/CVE.R
 \name{plot.cve}
 \alias{plot.cve}
-\title{Ploting helper for objects of class \code{cve}.}
+\title{Creates a kink plot of the sample loss distribution over SDR dimensions.}
 \usage{
-## S3 method for class 'cve'
-plot(x, content = "history", ...)
+\method{plot}{cve}(x, ...)
 }
 \arguments{
-\item{x}{Object of class \code{cve} (result of [cve()]).}
+\item{x}{Object of class \code{"cve"} (result of [\code{\link{cve}}]).}
 
-\item{...}{Pass through parameters to [plot()] and [lines()]}
-
-\item{content}{Specifies what to plot:
-\itemize{
-     \item "history" Plots the loss history from stiefel optimization
-         (default).
-     \item ... TODO: add (if there are any)
-}}
+\item{...}{Pass through parameters to [\code{\link{plot}}] and
+[\code{\link{lines}}]}
 }
 \description{
-Ploting helper for objects of class \code{cve}.
+Creates a kink plot of the sample loss distribution over SDR dimensions.
 }
 \seealso{
 see \code{\link{par}} for graphical parameters to pass through
-     as well as \code{\link{plot}} for standard plot utility.
+     as well as \code{\link{plot}}, the standard plot utility.
 }
diff --git a/LaTeX/notes.tex b/LaTeX/notes.tex
index 5379a9f..acd4690 100644
--- a/LaTeX/notes.tex
+++ b/LaTeX/notes.tex
@@ -3,6 +3,7 @@
 \usepackage[utf8]{inputenc}
 \usepackage[T1]{fontenc}
 \usepackage{amsmath, amsfonts, amssymb, amsthm}
+\usepackage{tikz}
 \usepackage{fullpage}
 
 \newcommand{\vecl}{\ensuremath{\operatorname{vec}_l}}
@@ -43,4 +44,18 @@ The relation between the matrix indices $i,j$ and the $\vecl$ index $k$ is given
     (\vecl(S)_k = s_{i,j} \quad\Leftrightarrow\quad k = jn+i) : j \in \{0,...,n-2\} \land j < i < n.
 \end{displaymath}
 
+\begin{center}
+    \begin{tikzpicture}[xscale=1,yscale=-1]
+        % \foreach \i in {0,...,5} {
+        %     \node at ({mod(\i, 3)}, {int(\i / 3)}) {$\i$};
+        % }
+        \foreach \i in {1,...,4} {
+            \foreach \j in {1,...,\i} {
+                \node at (\j, \i) {$\i,\j$};
+            }
+        }
+        
+    \end{tikzpicture}
+\end{center}
+
 \end{document}
\ No newline at end of file
diff --git a/benchmark.R b/benchmark.R
index 4e85c87..f4532e5 100644
--- a/benchmark.R
+++ b/benchmark.R
@@ -13,7 +13,29 @@ rowSums.c <- function(M) {
         M <- matrix(as.double(M), nrow = nrow(M))
     }
 
-    .Call('R_rowSums', PACKAGE = 'wip', M)
+    .Call('R_rowSums', PACKAGE = 'benchmark', M)
+}
+rowSumsV2.c <- function(M) {
+    stopifnot(
+        is.matrix(M),
+        is.numeric(M)
+    )
+    if (!is.double(M)) {
+        M <- matrix(as.double(M), nrow = nrow(M))
+    }
+
+    .Call('R_rowSumsV2', PACKAGE = 'benchmark', M)
+}
+rowSumsV3.c <- function(M) {
+    stopifnot(
+        is.matrix(M),
+        is.numeric(M)
+    )
+    if (!is.double(M)) {
+        M <- matrix(as.double(M), nrow = nrow(M))
+    }
+
+    .Call('R_rowSumsV3', PACKAGE = 'benchmark', M)
 }
 colSums.c <- function(M) {
     stopifnot(
@@ -24,7 +46,7 @@ colSums.c <- function(M) {
         M <- matrix(as.double(M), nrow = nrow(M))
     }
 
-    .Call('R_colSums', PACKAGE = 'wip', M)
+    .Call('R_colSums', PACKAGE = 'benchmark', M)
 }
 rowSquareSums.c <- function(M) {
     stopifnot(
@@ -35,7 +57,7 @@ rowSquareSums.c <- function(M) {
         M <- matrix(as.double(M), nrow = nrow(M))
     }
 
-    .Call('R_rowSquareSums', PACKAGE = 'wip', M)
+    .Call('R_rowSquareSums', PACKAGE = 'benchmark', M)
 }
 rowSumsSymVec.c <- function(vecA, nrow, diag = 0.0) {
     stopifnot(
@@ -47,7 +69,7 @@ rowSumsSymVec.c <- function(vecA, nrow, diag = 0.0) {
     if (!is.double(vecA)) {
         vecA <- as.double(vecA)
     }
-    .Call('R_rowSumsSymVec', PACKAGE = 'wip',
+    .Call('R_rowSumsSymVec', PACKAGE = 'benchmark',
           vecA, as.integer(nrow), as.double(diag))
 }
 rowSweep.c <- function(A, v, op = '-') {
@@ -66,7 +88,7 @@ rowSweep.c <- function(A, v, op = '-') {
         op %in% c('+', '-', '*', '/')
     )
 
-    .Call('R_rowSweep', PACKAGE = 'wip', A, v, op)
+    .Call('R_rowSweep', PACKAGE = 'benchmark', A, v, op)
 }
 
 ## row*, col* tests ------------------------------------------------------------
@@ -74,7 +96,15 @@ n <- 3000
 M <- matrix(runif(n * 12), n, 12)
 stopifnot(
     all.equal(rowSums(M^2), rowSums.c(M^2)),
-    all.equal(colSums(M), colSums.c(M))
+    all.equal(colSums(M), colSums.c(M)),
+    all.equal(rowSums(M), rowSumsV2.c(M)),
+    all.equal(rowSums(M), rowSumsV3.c(M))
+)
+microbenchmark(
+    rowSums     = rowSums(M),
+    rowSums.c   = rowSums.c(M),
+    rowSumsV2.c = rowSumsV2.c(M),
+    rowSumsV3.c = rowSumsV3.c(M)
 )
 microbenchmark(
     rowSums     = rowSums(M^2),
@@ -126,7 +156,23 @@ transpose.c <- function(A) {
         A <- matrix(as.double(A), nrow(A), ncol(A))
     }
 
-    .Call('R_transpose', PACKAGE = 'wip', A)
+    .Call('R_transpose', PACKAGE = 'benchmark', A)
+}
+
+sympMV.c <- function(vecA, x) {
+    stopifnot(
+        is.vector(vecA), is.numeric(vecA),
+        is.vector(x), is.numeric(x),
+        length(x) * (length(x) + 1) == 2 * length(vecA)
+    )
+    if (!is.double(vecA)) {
+        vecA <- as.double(vecA)
+    }
+    if (!is.double(x)) {
+        x <- as.double(x)
+    }
+
+    .Call('R_sympMV', PACKAGE = 'benchmark', vecA, x)
 }
 
 matrixprod.c <- function(A, B) {
@@ -142,7 +188,7 @@ matrixprod.c <- function(A, B) {
         B <- matrix(as.double(B), nrow = nrow(B))
     }
 
-    .Call('R_matrixprod', PACKAGE = 'wip', A, B)
+    .Call('R_matrixprod', PACKAGE = 'benchmark', A, B)
 }
 crossprod.c <- function(A, B) {
     stopifnot(
@@ -157,7 +203,7 @@ crossprod.c <- function(A, B) {
         B <- matrix(as.double(B), nrow = nrow(B))
     }
 
-    .Call('R_crossprod', PACKAGE = 'wip', A, B)
+    .Call('R_crossprod', PACKAGE = 'benchmark', A, B)
 }
 skewSymRank2k.c <- function(A, B, alpha = 1, beta = 0) {
     stopifnot(
@@ -174,7 +220,7 @@ skewSymRank2k.c <- function(A, B, alpha = 1, beta = 0) {
         B <- matrix(as.double(B), nrow = nrow(B))
     }
 
-    .Call('R_skewSymRank2k', PACKAGE = 'wip', A, B,
+    .Call('R_skewSymRank2k', PACKAGE = 'benchmark', A, B,
           as.double(alpha), as.double(beta))
 }
 
@@ -193,6 +239,18 @@ microbenchmark(
     transpose.c(A)
 )
 
+Sym <- tcrossprod(runif(n))
+vecSym <- Sym[lower.tri(Sym, diag = T)]
+x <- runif(n)
+stopifnot(all.equal(
+    as.double(Sym %*% x),
+    sympMV.c(vecSym, x)
+))
+microbenchmark(
+    Sym %*% x,
+    sympMV.c = sympMV.c(vecSym, x)
+)
+
 stopifnot(
     all.equal(A %*% B, matrixprod.c(A, B))
 )
@@ -232,7 +290,7 @@ nullProj.c <- function(B) {
         B <- matrix(as.double(B), nrow = nrow(B))
     }
 
-    .Call('R_nullProj', PACKAGE = 'wip', B)
+    .Call('R_nullProj', PACKAGE = 'benchmark', B)
 }
 ## Orthogonal projection onto null space tests --------------------------------
 p <- 12
@@ -252,7 +310,7 @@ microbenchmark(
 
 # ## WIP for gradient. ----------------------------------------------------------
 
-gradient.c <- function(X, X_diff, Y, V, h) {
+grad.c <- function(X, X_diff, Y, V, h) {
     stopifnot(
         is.matrix(X), is.double(X),
         is.matrix(X_diff), is.double(X_diff),
@@ -264,7 +322,7 @@ gradient.c <- function(X, X_diff, Y, V, h) {
         is.vector(h), is.numeric(h), length(h) == 1
     )
 
-    .Call('R_gradient', PACKAGE = 'wip',
+    .Call('R_grad', PACKAGE = 'benchmark',
           X, X_diff, as.double(Y), V, as.double(h));
 }
 
@@ -294,25 +352,23 @@ grad <- function(X, Y, V, h, persistent = TRUE) {
     # Vectorized distance matrix `D`.
     vecD <- rowSums((X_diff %*% Q)^2)
 
-
-    # Weight matrix `W` (dnorm ... gaussean density function)
-    W <- matrix(1, n, n) # `exp(0) == 1`
-    W[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part
-    W[upper] <- t.default(W)[upper] # Mirror lower tri. to upper
-    W <- sweep(W, 2, colSums(W), FUN = `/`) # Col-Normalize
+    # Create Kernel matrix (aka. apply kernel to distances)
+    K <- matrix(1, n, n) # `exp(0) == 1`
+    K[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part
+    K[upper] <- t(K)[upper] # Mirror lower tri. to upper
 
     # Weighted `Y` momentums
-    y1 <- Y %*% W # Result is 1D -> transposition irrelevant
-    y2 <- Y^2 %*% W
-
+    colSumsK <- colSums(K)
+    y1 <- (K %*% Y) / colSumsK
+    y2 <- (K %*% Y^2) / colSumsK
     # Per example loss `L(V, X_i)`
     L <- y2 - y1^2
 
-    # Vectorized Weights with forced symmetry
-    vecS <- (L[i] - (Y[j] - y1[i])^2) * W[lower]
-    vecS <- vecS + ((L[j] - (Y[i] - y1[j])^2) * W[upper])
-    # Compute scaling of `X` row differences
-    vecS <- vecS * vecD
+    # Compute scaling vector `vecS` for `X_diff`.
+    tmp <- kronecker(matrix(y1, n, 1), matrix(Y, 1, n), `-`)^2
+    tmp <- as.vector(L) - tmp
+    tmp <- tmp * K / colSumsK
+    vecS <- (tmp + t(tmp))[lower] * vecD
 
     G <- crossprod(X_diff, X_diff * vecS) %*% V
     G <- (-2 / (n * h^2)) * G
@@ -340,9 +396,9 @@ X_diff <- X[i, , drop = F] - X[j, , drop = F]
 
 stopifnot(all.equal(
     grad(X, Y, V, h),
-    gradient.c(X, X_diff, Y, V, h)
+    grad.c(X, X_diff, Y, V, h)
 ))
 microbenchmark(
     grad = grad(X, Y, V, h),
-    gradient.c = gradient.c(X, X_diff, Y, V, h)
+    grad.c = grad.c(X, X_diff, Y, V, h)
 )
diff --git a/benchmark.c b/benchmark.c
index 75c77d6..8b4ca92 100644
--- a/benchmark.c
+++ b/benchmark.c
@@ -6,11 +6,61 @@
 #include <R_ext/Error.h>
 // #include <Rmath.h>
 
-#include "wip.h"
+#include "benchmark.h"
 
-static inline void rowSums(const double *A,
-                           const int nrow, const int ncol,
-                           double *sum) {
+void rowSums(const double *A,
+             const int nrow, const int ncol,
+             double *sum) {
+    int i, j, block_size, block_size_i;
+    const double *A_block = A;
+    const double *A_end = A + nrow * ncol;
+
+    if (nrow > 508) {
+        block_size = 508;
+    } else {
+        block_size = nrow;
+    }
+
+    // Iterate `(block_size_i, ncol)` submatrix blocks.
+    for (i = 0; i < nrow; i += block_size_i) {
+        // Reset `A` to new block beginning.
+        A = A_block;
+        // Take block size of eveything left and reduce to max size.
+        block_size_i = nrow - i;
+        if (block_size_i > block_size) {
+            block_size_i = block_size;
+        }
+        // Copy blocks first column.
+        for (j = 0; j < block_size_i; j += 4) {
+            sum[j]     = A[j];
+            sum[j + 1] = A[j + 1];
+            sum[j + 2] = A[j + 2];
+            sum[j + 3] = A[j + 3];
+        }
+        for (; j < block_size_i; ++j) {
+            sum[j] = A[j];
+        }
+        // Sum following columns to the first one.
+        for (A += nrow; A < A_end; A += nrow) {
+            for (j = 0; j < block_size_i; j += 4) {
+                sum[j]     += A[j];
+                sum[j + 1] += A[j + 1];
+                sum[j + 2] += A[j + 2];
+                sum[j + 3] += A[j + 3];
+            }
+            for (; j < block_size_i; ++j) {
+                sum[j] += A[j];
+            }
+        }
+        // Step one block forth.
+        A_block += block_size_i;
+        sum += block_size_i;
+    }
+}
+
+void rowSumsV2(const double *A,
+               const int nrow, const int ncol,
+               double *sum) {
     int i, j, block_size, block_size_i;
     const double *A_block = A;
     const double *A_end = A + nrow * ncol;
@@ -45,33 +95,54 @@ static inline void rowSums(const double *A,
         sum += block_size_i;
     }
 }
+void rowSumsV3(const double *A,
+               const int nrow, const int ncol,
+               double *sum) {
+    int i, onei = 1;
+    double* ones = (double*)malloc(ncol * sizeof(double));
+    const double one = 1.0;
+    const double zero = 0.0;
 
-static inline void colSums(const double *A,
-                           const int nrow, const int ncol,
-                           double *sum) {
-    int j;
-    double *sum_end = sum + ncol;
+    for (i = 0; i < ncol; ++i) {
+        ones[i] = 1.0;
+    }
 
-    memset(sum, 0, sizeof(double) * ncol);
-    for (; sum < sum_end; ++sum) {
-        for (j = 0; j < nrow; ++j) {
-            *sum += A[j];
+    matrixprod(A, nrow, ncol, ones, ncol, 1, sum);
+    free(ones);
+}
+
+void colSums(const double *A, const int nrow, const int ncol,
+             double *sums) {
+    int i, j, nrowb = 4 * (nrow / 4); // 4 dividable nrow block, biggest 4*k <= nrow.
+    double sum;
+
+    for (j = 0; j < ncol; ++j) {
+        sum = 0.0;
+        for (i = 0; i < nrowb; i += 4) {
+            sum += A[i]
+                 + A[i + 1]
+                 + A[i + 2]
+                 + A[i + 3];
         }
+        for (; i < nrow; ++i) {
+            sum += A[i];
+        }
+        *(sums++) = sum;
         A += nrow;
     }
 }
 
-static inline void rowSquareSums(const double *A,
-                                 const int nrow, const int ncol,
-                                 double *sum) {
+void rowSquareSums(const double *A,
+                   const int nrow, const int ncol,
+                   double *sum) {
     int i, j, block_size, block_size_i;
     const double *A_block = A;
     const double *A_end = A + nrow * ncol;
 
-    if (nrow < CVE_MEM_CHUNK_SIZE) {
-        block_size = nrow;
+    if (nrow > 508) {
+        block_size = 508;
     } else {
-        block_size = CVE_MEM_CHUNK_SIZE;
+        block_size = nrow;
     }
 
     // Iterate `(block_size_i, ncol)` submatrix blocks.
@@ -80,16 +151,29 @@ static inline void rowSquareSums(const double *A,
         A = A_block;
         // Take block size of eveything left and reduce to max size.
         block_size_i = nrow - i;
-        if (block_size_i > block_size) {
+        if (block_size_i > block_size) { // TODO: contains BUG!!! floor last one !!!
             block_size_i = block_size;
+        } /// ...
+        // TODO:
+        // Copy blocks first column.
+        for (j = 0; j < block_size_i; j += 4) {
+            sum[j]     = A[j]     * A[j];
+            sum[j + 1] = A[j + 1] * A[j + 1];
+            sum[j + 2] = A[j + 2] * A[j + 2];
+            sum[j + 3] = A[j + 3] * A[j + 3];
         }
-        // Compute first blocks column,
-        for (j = 0; j < block_size_i; ++j) {
+        for (; j < block_size_i; ++j) {
             sum[j] = A[j] * A[j];
         }
-        // and sum the following columns to the first one.
+        // Sum following columns to the first one.
         for (A += nrow; A < A_end; A += nrow) {
-            for (j = 0; j < block_size_i; ++j) {
+            for (j = 0; j < block_size_i; j += 4) {
+                sum[j]     += A[j]     * A[j];
+                sum[j + 1] += A[j + 1] * A[j + 1];
+                sum[j + 2] += A[j + 2] * A[j + 2];
+                sum[j + 3] += A[j + 3] * A[j + 3];
+            }
+            for (; j < block_size_i; ++j) {
                 sum[j] += A[j] * A[j];
             }
         }
@@ -99,9 +183,9 @@ static inline void rowSquareSums(const double *A,
     }
 }
 
-static inline void rowSumsSymVec(const double *Avec, const int nrow,
-                                 const double diag,
-                                 double *sum) {
+void rowSumsSymVec(const double *Avec, const int nrow,
+                   const double diag,
+                   double *sum) {
     int i, j;
 
     if (diag == 0.0) {
@@ -121,10 +205,10 @@ static inline void rowSumsSymVec(const double *Avec, const int nrow,
 }
 
 /* C[, j] = A[, j] * v for each j = 1 to ncol */
-static void rowSweep(const double *A, const int nrow, const int ncol,
-                     const char* op,
-                     const double *v, // vector of length nrow
-                     double *C) {
+void rowSweep(const double *A, const int nrow, const int ncol,
+              const char* op,
+              const double *v, // vector of length nrow
+              double *C) {
     int i, j, block_size, block_size_i;
     const double *A_block = A;
     double *C_block = C;
@@ -241,9 +325,22 @@ void transpose(const double *A, const int nrow, const int ncol, double* T) {
     }
 }
 
-static inline void matrixprod(const double *A, const int nrowA, const int ncolA,
-                              const double *B, const int nrowB, const int ncolB,
-                              double *C) {
+// Symmetric Packed matrix vector product.
+// Computes
+//      y <- Ax
+// where A is supplied as packed lower triangular part of a symmetric
+// matrix A. Meaning that `vecA` is `vec_ltri(A)`.
+void sympMV(const double* vecA, const int nrow, const double* x, double* y) {
+    double one = 1.0;
+    double zero = 0.0;
+    int onei = 1;
+
+    F77_NAME(dspmv)("L", &nrow, &one, vecA, x, &onei, &zero, y, &onei);
+}
+
+void matrixprod(const double *A, const int nrowA, const int ncolA,
+                const double *B, const int nrowB, const int ncolB,
+                double *C) {
     const double one = 1.0;
     const double zero = 0.0;
 
@@ -254,9 +351,9 @@ static inline void matrixprod(const double *A, const int nrowA, const int ncolA,
                     &zero, C, &nrowA);
 }
 
-static inline void crossprod(const double *A, const int nrowA, const int ncolA,
-                             const double *B, const int nrowB, const int ncolB,
-                             double *C) {
+void crossprod(const double *A, const int nrowA, const int ncolA,
+               const double *B, const int nrowB, const int ncolB,
+               double *C) {
     const double one = 1.0;
     const double zero = 0.0;
 
@@ -267,8 +364,8 @@ static inline void crossprod(const double *A, const int nrowA, const int ncolA,
                     &zero, C, &ncolA);
 }
 
-static inline void nullProj(const double *B, const int nrowB, const int ncolB,
-                            double *Q) {
+void nullProj(const double *B, const int nrowB, const int ncolB,
+              double *Q) {
     const double minusOne = -1.0;
     const double one = 1.0;
 
@@ -286,7 +383,7 @@ static inline void nullProj(const double *B, const int nrowB, const int ncolB,
                     &one, Q, &nrowB);
 }
 
-static inline void rangePairs(const int from, const int to, int *pairs) {
+void rangePairs(const int from, const int to, int *pairs) {
     int i, j;
     for (i = from; i < to; ++i) {
        for (j = i + 1; j < to; ++j) {
@@ -300,48 +397,56 @@ static inline void rangePairs(const int from, const int to, int *pairs) {
 // A dence skwe-symmetric rank 2 update.
 // Perform the update
 //      C := alpha (A * B^T - B * A^T) + beta C
-static void skewSymRank2k(const int nrow, const int ncol,
-                          double alpha, const double *A, const double *B,
-                          double beta,
-                          double *C) {
+void skewSymRank2k(const int nrow, const int ncol,
+                   double alpha, const double *A, const double *B,
+                   double beta,
+                   double *C) {
     F77_NAME(dgemm)("N", "T",
-             &nrow, &nrow, &ncol,
-             &alpha, A, &nrow, B, &nrow,
-             &beta, C, &nrow);
-
+                    &nrow, &nrow, &ncol,
+                    &alpha, A, &nrow, B, &nrow,
+                    &beta, C, &nrow);
     alpha *= -1.0;
     beta = 1.0;
     F77_NAME(dgemm)("N", "T",
-             &nrow, &nrow, &ncol,
-             &alpha, B, &nrow, A, &nrow,
-             &beta, C, &nrow);
+                    &nrow, &nrow, &ncol,
+                    &alpha, B, &nrow, A, &nrow,
+                    &beta, C, &nrow);
+}
+// TODO: clarify
+static inline double gaussKernel(const double x, const double scale) {
+    return exp(scale * x * x);
 }
 
 // TODO: mutch potential for optimization!!!
-static inline void weightedYandLoss(const int n,
-                                    const double *Y,
-                                    const double *vecD,
-                                    const double *vecW,
-                                    const double *colSums,
-                                    double *y1, double *L, double *vecS,
-                                    double *const loss) {
+static void weightedYandLoss(const int n,
+                             const double *Y,
+                             const double *vecD,
+                             const double *vecW,
+                             const double *colSums,
+                             double *y1, double *L, double *vecS,
+                             double *loss) {
     int i, j, k, N = n * (n - 1) / 2;
     double l;
 
     for (i = 0; i < n; ++i) {
-        y1[i] = Y[i] / colSums[i];
-        L[i] = Y[i] * Y[i] / colSums[i];
+        y1[i] = Y[i];
+        L[i] = Y[i] * Y[i];
     }
 
     for (k = j = 0; j < n; ++j) {
         for (i = j + 1; i < n; ++i, ++k) {
-            y1[i] += Y[j] * vecW[k] / colSums[i];
-            y1[j] += Y[i] * vecW[k] / colSums[j];
-            L[i] += Y[j] * Y[j] * vecW[k] / colSums[i];
-            L[j] += Y[i] * Y[i] * vecW[k] / colSums[j];
+            y1[i] += Y[j] * vecW[k];
+            y1[j] += Y[i] * vecW[k];
+            L[i] += Y[j] * Y[j] * vecW[k];
+            L[j] += Y[i] * Y[i] * vecW[k];
         }
     }
 
+    for (i = 0; i < n; ++i) {
+        y1[i] /= colSums[i];
+        L[i] /= colSums[i];
+    }
+
     l = 0.0;
     for (i = 0; i < n; ++i) {
         l += (L[i] -= y1[i] * y1[i]);
@@ -362,17 +467,13 @@ static inline void weightedYandLoss(const int n,
     }
 }
 
-inline double gaussKernel(const double x, const double scale) {
-    return exp(scale * x * x);
-}
-
-static void gradient(const int n, const int p, const int q,
-                     const double *X,
-                     const double *X_diff,
-                     const double *Y,
-                     const double *V,
-                     const double h,
-                     double *G, double *const loss) {
+void grad(const int n, const int p, const int q,
+          const double *X,
+          const double *X_diff,
+          const double *Y,
+          const double *V,
+          const double h,
+          double *G, double *loss) {
     // Number of X_i to X_j not trivial pairs.
     int i, N = (n * (n - 1)) / 2;
     double scale = -0.5 / h;
@@ -398,12 +499,12 @@ static void gradient(const int n, const int p, const int q,
     rowSquareSums(X_proj, N, p, vecD);
 
     // Apply kernel to distence vector for weights computation.
-    double *vecW = X_proj; // reuse memory area, no longer needed.
+    double *vecK = X_proj; // reuse memory area, no longer needed.
     for (i = 0; i < N; ++i) {
-        vecW[i] = gaussKernel(vecD[i], scale);
+        vecK[i] = gaussKernel(vecD[i], scale);
     }
     double *colSums = X_proj + N; // still allocated!
-    rowSumsSymVec(vecW, n, 1.0, colSums); // rowSums = colSums cause Sym
+    rowSumsSymVec(vecK, n, 1.0, colSums); // rowSums = colSums cause Sym
 
     // compute weighted responces of first end second momontum, aka y1, y2.
     double *y1 = X_proj + N + n;
@@ -411,7 +512,7 @@ static void gradient(const int n, const int p, const int q,
     // Allocate X_diff scaling vector `vecS`, not in `X_proj` mem area because
     // used symultanious to X_proj in final gradient computation.
     double *vecS = (double*)malloc(N * sizeof(double));
-    weightedYandLoss(n, Y, vecD, vecW, colSums, y1, L, vecS, loss);
+    weightedYandLoss(n, Y, vecD, vecK, colSums, y1, L, vecS, loss);
 
     // compute the gradient using X_proj for intermidiate scaled X_diff.
     rowSweep(X_diff, N, p, "*", vecS, X_proj);
diff --git a/benchmark.h b/benchmark.h
index 1e1f7dc..4ed3e62 100644
--- a/benchmark.h
+++ b/benchmark.h
@@ -6,9 +6,9 @@
 #define CVE_MEM_CHUNK_SMALL 1016
 #define CVE_MEM_CHUNK_SIZE 2032
 
-static inline void rowSums(const double *A,
-                           const int nrow, const int ncol,
-                           double *sum);
+void rowSums(const double *A,
+             const int nrow, const int ncol,
+             double *sum);
 SEXP R_rowSums(SEXP A) {
     SEXP sums = PROTECT(allocVector(REALSXP, nrows(A)));
 
@@ -17,10 +17,32 @@ SEXP R_rowSums(SEXP A) {
     UNPROTECT(1);
     return sums;
 }
+void rowSumsV2(const double *A,
+               const int nrow, const int ncol,
+               double *sum);
+SEXP R_rowSumsV2(SEXP A) {
+    SEXP sums = PROTECT(allocVector(REALSXP, nrows(A)));
 
-static inline void colSums(const double *A,
-                           const int nrow, const int ncol,
-                           double *sum);
+    rowSumsV2(REAL(A), nrows(A), ncols(A), REAL(sums));
+
+    UNPROTECT(1);
+    return sums;
+}
+void rowSumsV3(const double *A,
+               const int nrow, const int ncol,
+               double *sum);
+SEXP R_rowSumsV3(SEXP A) {
+    SEXP sums = PROTECT(allocVector(REALSXP, nrows(A)));
+
+    rowSumsV3(REAL(A), nrows(A), ncols(A), REAL(sums));
+
+    UNPROTECT(1);
+    return sums;
+}
+
+void colSums(const double *A,
+             const int nrow, const int ncol,
+             double *sum);
 SEXP R_colSums(SEXP A) {
     SEXP sums = PROTECT(allocVector(REALSXP, ncols(A)));
 
@@ -30,7 +52,7 @@ SEXP R_colSums(SEXP A) {
     return sums;
 }
 
-static inline void rowSquareSums(const double*, const int, const int, double*);
+void rowSquareSums(const double*, const int, const int, double*);
 SEXP R_rowSquareSums(SEXP A) {
     SEXP result = PROTECT(allocVector(REALSXP, nrows(A)));
 
@@ -40,9 +62,9 @@ SEXP R_rowSquareSums(SEXP A) {
     return result;
 }
 
-static inline void rowSumsSymVec(const double *Avec, const int nrow,
-                                 const double diag,
-                                 double *sum);
+void rowSumsSymVec(const double *Avec, const int nrow,
+                   const double diag,
+                   double *sum);
 SEXP R_rowSumsSymVec(SEXP Avec, SEXP nrow, SEXP diag) {
     SEXP sum = PROTECT(allocVector(REALSXP, *INTEGER(nrow)));
 
@@ -52,10 +74,10 @@ SEXP R_rowSumsSymVec(SEXP Avec, SEXP nrow, SEXP diag) {
     return sum;
 }
 
-static void rowSweep(const double *A, const int nrow, const int ncol,
-                     const char* op,
-                     const double *v, // vector of length nrow
-                     double *C);
+void rowSweep(const double *A, const int nrow, const int ncol,
+              const char* op,
+              const double *v, // vector of length nrow
+              double *C);
 SEXP R_rowSweep(SEXP A, SEXP v, SEXP op) {
     SEXP C = PROTECT(allocMatrix(REALSXP, nrows(A), ncols(A)));
 
@@ -77,9 +99,19 @@ SEXP R_transpose(SEXP A) {
     return T;
 }
 
-static inline void matrixprod(const double *A, const int nrowA, const int ncolA,
-                              const double *B, const int nrowB, const int ncolB,
-                              double *C);
+void sympMV(const double* vecA, const int nrow, const double* x, double* y);
+SEXP R_sympMV(SEXP vecA, SEXP x) {
+    SEXP y = PROTECT(allocVector(REALSXP, length(x)));
+
+    sympMV(REAL(vecA), length(x), REAL(x), REAL(y));
+
+    UNPROTECT(1); /* y */
+    return y;
+}
+
+void matrixprod(const double *A, const int nrowA, const int ncolA,
+                const double *B, const int nrowB, const int ncolB,
+                double *C);
 SEXP R_matrixprod(SEXP A, SEXP B) {
     SEXP C = PROTECT(allocMatrix(REALSXP, nrows(A), ncols(B)));
 
@@ -91,9 +123,9 @@ SEXP R_matrixprod(SEXP A, SEXP B) {
     return C;
 }
 
-static inline void crossprod(const double* A, const int nrowA, const int ncolA,
-                             const double* B, const int ncolB, const int nrowB,
-                             double* C);
+void crossprod(const double* A, const int nrowA, const int ncolA,
+               const double* B, const int ncolB, const int nrowB,
+               double* C);
 SEXP R_crossprod(SEXP A, SEXP B) {
     SEXP C = PROTECT(allocMatrix(REALSXP, ncols(A), ncols(B)));
 
@@ -105,10 +137,10 @@ SEXP R_crossprod(SEXP A, SEXP B) {
     return C;
 }
 
-static void skewSymRank2k(const int n, const int k,
-                          double alpha, const double *A, const double *B,
-                          double beta,
-                          double *C);
+void skewSymRank2k(const int n, const int k,
+                   double alpha, const double *A, const double *B,
+                   double beta,
+                   double *C);
 SEXP R_skewSymRank2k(SEXP A, SEXP B, SEXP alpha, SEXP beta) {
     SEXP C = PROTECT(allocMatrix(REALSXP, nrows(A), nrows(A)));
     memset(REAL(C), 0, nrows(A) * nrows(A) * sizeof(double));
@@ -121,8 +153,8 @@ SEXP R_skewSymRank2k(SEXP A, SEXP B, SEXP alpha, SEXP beta) {
     return C;
 }
 
-static inline void nullProj(const double* B, const int nrowB, const int ncolB,
-                            double* Q);
+void nullProj(const double* B, const int nrowB, const int ncolB,
+              double* Q);
 SEXP R_nullProj(SEXP B) {
     SEXP Q = PROTECT(allocMatrix(REALSXP, nrows(B), nrows(B)));
 
@@ -132,7 +164,7 @@ SEXP R_nullProj(SEXP B) {
     return Q;
 }
 
-static inline void rangePairs(const int from, const int to, int *pairs);
+void rangePairs(const int from, const int to, int *pairs);
 SEXP R_rangePairs(SEXP from, SEXP to) {
     int start = asInteger(from);
     int end = asInteger(to) + 1;
@@ -145,22 +177,22 @@ SEXP R_rangePairs(SEXP from, SEXP to) {
     return out;
 }
 
-static void gradient(const int n, const int p, const int q,
-                     const double *X,
-                     const double *X_diff,
-                     const double *Y,
-                     const double *V,
-                     const double h,
-                     double *G, double *const loss);
-SEXP R_gradient(SEXP X, SEXP X_diff, SEXP Y, SEXP V, SEXP h) {
+void grad(const int n, const int p, const int q,
+          const double *X,
+          const double *X_diff,
+          const double *Y,
+          const double *V,
+          const double h,
+          double *G, double *const loss);
+SEXP R_grad(SEXP X, SEXP X_diff, SEXP Y, SEXP V, SEXP h) {
     int N = (nrows(X) * (nrows(X) - 1)) / 2;
 
     SEXP G = PROTECT(allocMatrix(REALSXP, nrows(V), ncols(V)));
     SEXP loss = PROTECT(allocVector(REALSXP, 1));
 
-    gradient(nrows(X), ncols(X), ncols(V),
-             REAL(X), REAL(X_diff), REAL(Y), REAL(V), *REAL(h),
-             REAL(G), REAL(loss));
+    grad(nrows(X), ncols(X), ncols(V),
+         REAL(X), REAL(X_diff), REAL(Y), REAL(V), *REAL(h),
+         REAL(G), REAL(loss));
 
     UNPROTECT(2);
     return G;
diff --git a/runtime_test.R b/runtime_test.R
index c0ae73b..de85c38 100644
--- a/runtime_test.R
+++ b/runtime_test.R
@@ -16,7 +16,7 @@ subspace.dist <- function(B1, B2){
 }
 
 # Number of simulations
-SIM.NR <- 20
+SIM.NR <- 50
 # maximal number of iterations in curvilinear search algorithm
 MAXIT <- 50
 # number of arbitrary starting values for curvilinear optimization
@@ -24,7 +24,7 @@ ATTEMPTS <- 10
 # set names of datasets
 dataset.names <- c("M1", "M2", "M3", "M4", "M5")
 # Set used CVE method
-methods <- c("simple") # c("legacy", "simple", "sgd", "linesearch")
+methods <- c("simple") # c("legacy", "simple", "linesearch", "sgd")
 
 if ("legacy" %in% methods) {
     # Source legacy code (but only if needed)
@@ -43,7 +43,9 @@ log.nr <- length(list.files('tmp/', pattern = '.*\\.log'))
 file <- file.path('tmp', paste0('test', log.nr, '.log'))
 cat('dataset\tmethod\terror\ttime\n', sep = '', file = file)
 # Open a new pdf device for plotting into (do not overwrite existing ones)
-pdf(file.path('tmp', paste0('test', log.nr, '.pdf')))
+path <- paste0('test', log.nr, '.pdf')
+pdf(file.path('tmp', path))
+cat('Plotting to file:', path, '\n')
 
 # only for telling user (to stdout)
 count <- 0