wip: proper package implementation

2019-08-12 00:33:52 +02:00 · 2019-08-12 00:33:52 +02:00 · b071a689d9
parent 095e463463
commit b071a689d9
17 changed files with 491 additions and 132 deletions
--- a/CVE/NAMESPACE
+++ b/CVE/NAMESPACE
@ -1,10 +1,18 @@
 # Generated by roxygen2: do not edit by hand

+S3method(plot,cve)
 export(cve)
-export(cve_cpp)
+export(cve.call)
 export(dataset)
 export(estimateBandwidth)
 export(rStiefel)
 import(Rcpp)
+import(stats)
 importFrom(Rcpp,evalCpp)
+importFrom(graphics,lines)
+importFrom(graphics,plot)
+importFrom(graphics,points)
+importFrom(stats,model.frame)
+importFrom(stats,rbinom)
+importFrom(stats,rnorm)
 useDynLib(CVE)
--- a/CVE/R/CVE.R
+++ b/CVE/R/CVE.R
@ -1,44 +1,145 @@
-#' Conditional Variance Estimator
+#' Implementation of the CVE method.
 #'
 #' Conditional Variance Estimator (CVE) is a novel sufficient dimension
-#'  reduction (SDR) method for regressions satisfying E(Y|X) = E(Y|B'X),
-#'  where B'X is a lower dimensional projection of the predictors.
+#' reduction (SDR) method assuming a model
+#' \deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
+#' where B'X is a lower dimensional projection of the predictors.
 #'
-#' @param X A matrix of type numeric of dimensions N times dim where N is the number
-#'  of entries with dim as data dimension.
-#' @param Y A vector of type numeric of length N (coresponds to \code{x}).
-#' @param k Guess for rank(B).
-#' @param nObs As describet in the paper.
-#'  
-#' @param tol Tolerance for optimization stopping creteria.
+#' @param formula Formel for the regression model defining `X`, `Y`.
+#'  See: \code{\link{formula}}.
+#' @param data data.frame holding data for formula.
+#' @param method The different only differe in the used optimization.
+#'  All of them are Gradient based optimization on a Stiefel manifold.
+#' \itemize{
+#'      \item "simple" Simple reduction of stepsize.
+#'      \item "linesearch" determines stepsize with backtracking linesearch
+#'          using Armijo-Wolf conditions.
+#'      \item TODO: further
+#' }
+#' @param ... Further parameters depending on the used method.
+#'      TODO: See ...
+#' @examples
+#' library(CVE)
+#' ds <- dataset("M5")
+#' X <- ds$X
+#' Y <- ds$Y
+#' dr <- cve(Y ~ X, k = 1)
+#'
+#' @references Fertl L, Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
+#'
+#' @import stats
+#' @importFrom stats model.frame
 #' @export
+cve <- function(formula, data, method = "simple", ...) {
+    # check for type of `data` if supplied and set default
+    if (missing(data)) {
+        data <- environment(formula)
+    } else if (!is.data.frame(data)) {
+        stop('Parameter `data` must be a `data.frame` or missing.')
+    }
+
+    # extract `X`, `Y` from `formula` with `data`
+    model <- stats::model.frame(formula, data)
+    X <- as.matrix(model[,-1, drop=FALSE])
+    Y <- as.matrix(model[, 1, drop=FALSE])
+
+    # pass extracted data on to [cve.call()]
+    dr <- cve.call(X, Y, method = method, ...)
+
+    # overwrite `call` property from [cve.call()]
+    dr$call <- match.call()
+    return(dr)
+}
+
+#' @rdname cve
+#' @export
+cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5, k, ...) {
+
+    # TODO: replace default value of `k` by `max.dim` when fast enough
+    if (missing(k)) {
+        stop("TODO: parameter `k` (rank(B)) is required, replace by `max.dim`.")
+    }
+
+    # parameter checking
+    if (!(is.matrix(X) && is.matrix(Y))) {
+        stop('X and Y should be matrices.')
+    }
+    if (nrow(X) != nrow(Y)) {
+        stop('Rows of X and Y are not compatible.')
+    }
+    if (ncol(X) < 2) {
+        stop('X is one dimensional, no need for dimension reduction.')
+    }
+    if (ncol(Y) > 1) {
+        stop('Only one dimensional responces Y are supported.')
+    }
+
+    # call C++ CVE implementation
+    # dr ... Dimension Reduction
+    dr <- cve_cpp(X, Y, tolower(method), k = k, nObs = nObs, ...)
+
+    # augment result information
+    dr$method <- method
+    dr$call <- match.call()
+    class(dr) <- "cve"
+    return(dr)
+}
+
+# TODO: write summary
+# summary.cve <- function() {
+#     # code #
+# }
+
+#' Ploting helper for objects of class \code{cve}.
 #'
-#' @seealso TODO: \code{vignette("CVE_paper", package="CVE")}.
+#' @param x Object of class \code{cve} (result of [cve()]).
+#' @param content Specifies what to plot:
+#' \itemize{
+#'      \item "history" Plots the loss history from stiefel optimization
+#'          (default).
+#'      \item ... TODO: add (if there are any)
+#' }
+#' @param ... Pass through parameters to [plot()] and [lines()]
 #'
-#' @references Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
-cve <- function(X, Y, k,
-                nObs = sqrt(nrow(X)),
-                tauInitial = 1.0,
-                tol = 1e-3,
-                slack = -1e-10,
-                maxIter = 50L,
-                attempts = 10L
-) {
-    # check data parameter types
-    stopifnot(
-        is.matrix(X),
-        is.vector(Y),
-        typeof(X) == 'double',
-        typeof(Y) == 'double'
+#' @seealso see \code{\link{par}} for graphical parameters to pass through.
+#' @importFrom graphics plot lines points
+#' @method plot cve
+#' @export
+plot.cve <- function(x, ...) {
+
+    H <- x$history
+    H_1 <- H[H[, 1] > 0, 1]
+
+    defaults <- list(
+        main = "History",
+        xlab = "Iterations i",
+        ylab = expression(loss == L[n](V^{(i)})),
+        xlim = c(1, nrow(H)),
+        ylim = c(0, max(H)),
+        type = "l"
    )

-    # call CVE C++ implementation
-    return(cve_cpp(X, Y, k,
-                   nObs,
-                   tauInitial,
-                   tol,
-                   slack,
-                   maxIter,
-                   attempts
-    ))
+    call.plot <- match.call()
+    keys <- names(defaults)
+    keys <- keys[match(keys, names(call.plot)[-1], nomatch = 0) == 0]
+
+    for (key in keys) {
+        call.plot[[key]] <- defaults[[key]]
+    }
+
+    call.plot[[1L]] <- quote(plot)
+    call.plot$x <- quote(1:length(H_1))
+    call.plot$y <- quote(H_1)
+
+    eval(call.plot)
+
+    if (ncol(H) > 1) {
+        for (i in 2:ncol(H)) {
+            H_i <- H[H[, i] > 0, i]
+            lines(1:length(H_i), H_i)
+        }
+    }
+    x.ends <- apply(H, 2, function(h) { length(h[h > 0]) })
+    y.ends <- apply(H, 2, function(h) { min(h[h > 0]) })
+    points(x.ends, y.ends)
 }
--- a/CVE/R/RcppExports.R
+++ b/CVE/R/RcppExports.R
@ -1,6 +1,18 @@
 # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

+#' Gradient computation of the loss `L_n(V)`.
+#'
+#' The loss is defined as
+#' \deqn{L_n(V) := \frac{1}{n}\sum_{j=1}^n y_2(V, X_j) - y_1(V, X_j)^2}{L_n(V) := 1/n sum_j( (y_2(V, X_j) - y_1(V, X_j)^2 )}
+#' with
+#' \deqn{y_l(s_0) := \sum_{i=1}^n w_i(V, s_0)Y_i^l}{y_l(s_0) := sum_i(w_i(V, s_0) Y_i^l)}
+#'
+#' @rdname optStiefel
+#' @keywords internal
+#' @name gradient
+NULL
+
 #' Stiefel Optimization.
 #'
 #' Stiefel Optimization for \code{V} given a dataset \code{X} and responces
@ -24,8 +36,13 @@
 #'     orthogonal space spaned by \code{V}.
 #'
 #' @rdname optStiefel
-#' @name optStiefel
 #' @keywords internal
+#' @name optStiefel_simple
+NULL
+
+#' @rdname optStiefel
+#' @keywords internal
+#' @name optStiefel_linesearch
 NULL

 #' Estimated bandwidth for CVE.
@ -87,9 +104,8 @@ rStiefel <- function(p, q) {
 #'     and the matrix \code{B} estimated for the model as a orthogonal basis of the
 #'     orthogonal space spaned by \code{V}.
 #'
-#' @rdname cve_cpp_V1
-#' @export
-cve_cpp <- function(X, Y, k, nObs, tauInitial = 1., tol = 1e-5, slack = -1e-10, maxIter = 50L, attempts = 10L) {
-    .Call('_CVE_cve_cpp', PACKAGE = 'CVE', X, Y, k, nObs, tauInitial, tol, slack, maxIter, attempts)
+#' @keywords internal
+cve_cpp <- function(X, Y, method, k, nObs, tauInitial = 1., rho1 = 0.1, rho2 = 0.9, tol = 1e-5, maxIter = 50L, maxLineSearchIter = 10L, attempts = 10L) {
+    .Call('_CVE_cve_cpp', PACKAGE = 'CVE', X, Y, method, k, nObs, tauInitial, rho1, rho2, tol, maxIter, maxLineSearchIter, attempts)
 }

--- a/CVE/R/datasets.R
+++ b/CVE/R/datasets.R
@ -12,10 +12,12 @@
 #' @param lambda Only for \code{"M4"}, see: below.
 #'
 #' @return List with elements
-#' \item{X}{data}
-#' \item{Y}{response}
-#' \item{B}{Used dim-reduction matrix}
-#' \item{name}{Name of the dataset (name parameter)}
+#' \itemize{
+#'      \item{X}{data}
+#'      \item{Y}{response}
+#'      \item{B}{Used dim-reduction matrix}
+#'      \item{name}{Name of the dataset (name parameter)}
+#' }
 #'
 #' @section M1:
 #' The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
@ -33,9 +35,9 @@
 #' @section M5:
 #' TODO:
 #'
+#' @import stats
+#' @importFrom stats rnorm rbinom
 #' @export
-#'
-#' @references Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
 dataset <- function(name = "M1", n, B, p.mix = 0.3, lambda = 1.0) {
    # validate parameters
    stopifnot(name %in% c("M1", "M2", "M3", "M4", "M5"))
--- a/CVE/R/package.R
+++ b/CVE/R/package.R
@ -5,6 +5,9 @@
 #'
 #' TODO: And some details
 #'
+#'
+#' @references Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
+#'
 #' @docType package
 #' @author Loki
 #' @import Rcpp
--- a/CVE/man/CVE-package.Rd
+++ b/CVE/man/CVE-package.Rd
@ -12,6 +12,9 @@ Conditional Variance Estimator for Sufficient Dimension
 \details{
 TODO: And some details
 }
+\references{
+Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
+}
 \author{
 Loki
 }
--- a/CVE/man/cve.Rd
+++ b/CVE/man/cve.Rd
@ -2,31 +2,45 @@
 % Please edit documentation in R/CVE.R
 \name{cve}
 \alias{cve}
-\title{Conditional Variance Estimator}
+\alias{cve.call}
+\title{Implementation of the CVE method.}
 \usage{
-cve(X, Y, k, nObs = sqrt(nrow(X)), tauInitial = 1, tol = 0.001,
-  slack = -1e-10, maxIter = 50L, attempts = 10L)
+cve(formula, data, method = "simple", ...)
+
+cve.call(X, Y, method = "simple", nObs = nrow(X)^0.5, k, ...)
 }
 \arguments{
-\item{X}{A matrix of type numeric of dimensions N times dim where N is the number
-of entries with dim as data dimension.}
+\item{formula}{Formel for the regression model defining `X`, `Y`.
+See: \code{\link{formula}}.}

-\item{Y}{A vector of type numeric of length N (coresponds to \code{x}).}
+\item{data}{data.frame holding data for formula.}

-\item{k}{Guess for rank(B).}
+\item{method}{The different only differe in the used optimization.
+ All of them are Gradient based optimization on a Stiefel manifold.
+\itemize{
+     \item "simple" Simple reduction of stepsize.
+     \item "linesearch" determines stepsize with backtracking linesearch
+         using Armijo-Wolf conditions.
+     \item TODO: further
+}}

-\item{nObs}{As describet in the paper.}
-
-\item{tol}{Tolerance for optimization stopping creteria.}
+\item{...}{Further parameters depending on the used method.
+TODO: See ...}
 }
 \description{
 Conditional Variance Estimator (CVE) is a novel sufficient dimension
- reduction (SDR) method for regressions satisfying E(Y|X) = E(Y|B'X),
- where B'X is a lower dimensional projection of the predictors.
+reduction (SDR) method assuming a model
+\deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
+where B'X is a lower dimensional projection of the predictors.
+}
+\examples{
+library(CVE)
+ds <- dataset("M5")
+X <- ds$X
+Y <- ds$Y
+dr <- cve(Y ~ X, k = 1)
+
 }
 \references{
-Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
-}
-\seealso{
-TODO: \code{vignette("CVE_paper", package="CVE")}.
+Fertl L, Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
 }
--- a/CVE/man/cve_cpp_V1.Rd
+++ b/CVE/man/cve_cpp_V1.Rd
@ -4,8 +4,9 @@
 \alias{cve_cpp}
 \title{Conditional Variance Estimation (CVE) method.}
 \usage{
-cve_cpp(X, Y, k, nObs, tauInitial = 1, tol = 1e-05, slack = -1e-10,
-  maxIter = 50L, attempts = 10L)
+cve_cpp(X, Y, method, k, nObs, tauInitial = 1, rho1 = 0.1,
+  rho2 = 0.9, tol = 1e-05, maxIter = 50L, maxLineSearchIter = 10L,
+  attempts = 10L)
 }
 \arguments{
 \item{X}{data points}
@ -19,13 +20,13 @@ cve_cpp(X, Y, k, nObs, tauInitial = 1, tol = 1e-05, slack = -1e-10,

 \item{tol}{Tolerance for update error used for stopping criterion (default 1e-5)}

-\item{slack}{Ratio of small negative error allowed in loss optimization (default -1e-10)}
-
 \item{maxIter}{Upper bound of optimization iterations (default 50)}

 \item{attempts}{Number of tryes with new random optimization starting points (default 10)}

 \item{tau}{Initial step size (default 1)}
+
+\item{slack}{Ratio of small negative error allowed in loss optimization (default -1e-10)}
 }
 \value{
 List containing the bandwidth \code{h}, optimization objective \code{V}
@ -35,3 +36,4 @@ List containing the bandwidth \code{h}, optimization objective \code{V}
 \description{
 This version uses a "simple" stiefel optimization schema.
 }
+\keyword{internal}
--- a/CVE/man/dataset.Rd
+++ b/CVE/man/dataset.Rd
@ -19,10 +19,12 @@ dataset(name = "M1", n, B, p.mix = 0.3, lambda = 1)
 }
 \value{
 List with elements
-\item{X}{data}
-\item{Y}{response}
-\item{B}{Used dim-reduction matrix}
-\item{name}{Name of the dataset (name parameter)}
+\itemize{
+     \item{X}{data}
+     \item{Y}{response}
+     \item{B}{Used dim-reduction matrix}
+     \item{name}{Name of the dataset (name parameter)}
+}
 }
 \description{
 Provides sample datasets. There are 5 different datasets named
@ -60,6 +62,3 @@ TODO:
 TODO:
 }

-\references{
-Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
-}
--- a/CVE/man/optStiefel.Rd
+++ b/CVE/man/optStiefel.Rd
@ -1,8 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/RcppExports.R
-\name{optStiefel}
-\alias{optStiefel}
-\title{Stiefel Optimization.}
+\name{gradient}
+\alias{gradient}
+\alias{optStiefel_simple}
+\alias{optStiefel_linesearch}
+\title{Gradient computation of the loss `L_n(V)`.}
 \arguments{
 \item{X}{data points}

@ -27,6 +29,11 @@ List containing the bandwidth \code{h}, optimization objective \code{V}
    orthogonal space spaned by \code{V}.
 }
 \description{
+The loss is defined as
+\deqn{L_n(V) := \frac{1}{n}\sum_{j=1}^n y_2(V, X_j) - y_1(V, X_j)^2}{L_n(V) := 1/n sum_j( (y_2(V, X_j) - y_1(V, X_j)^2 )}
+with
+\deqn{y_l(s_0) := \sum_{i=1}^n w_i(V, s_0)Y_i^l}{y_l(s_0) := sum_i(w_i(V, s_0) Y_i^l)}
+
 Stiefel Optimization for \code{V} given a dataset \code{X} and responces
 \code{Y} for the model \deqn{Y\sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
 to compute the matrix `B` such that \eqn{span{B} = span(V)^{\bot}}{%
--- a/CVE/man/plot.cve.Rd
+++ b/CVE/man/plot.cve.Rd
@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/CVE.R
+\name{plot.cve}
+\alias{plot.cve}
+\title{Ploting helper for objects of class \code{"cve"}.}
+\usage{
+\method{plot}{cve}(x, ...)
+}
+\arguments{
+\item{x}{Object of class \code{"cve"} (result of [cve()]).}
+
+\item{...}{Pass through parameters to [plot()] and [lines()]}
+
+\item{content}{Specifies what to plot:
+\itemize{
+     \item "history" Plots the loss history from stiefel optimization.
+     \item ... TODO: add (if there are any)
+}}
+}
+\description{
+Ploting helper for objects of class \code{"cve"}.
+}
+\seealso{
+see \code{\link{par}} for graphical parameters to pass through.
+}
--- a/CVE/src/CVE.cpp
+++ b/CVE/src/CVE.cpp
@ -1,8 +1,3 @@
-//
-// Usage:
-//  ~$ R -e "library(Rcpp); Rcpp::sourceCpp('cve_V1.cpp')"
-//
-
 // only `RcppArmadillo.h` which includes `Rcpp.h`
 #include <RcppArmadillo.h>

@ -14,6 +9,9 @@
 // required for `R::qchisq()` used in `estimateBandwidth()`
 #include <Rmath.h>

+// for proper error handling
+#include <stdexcept>
+
 //' Estimated bandwidth for CVE.
 //'
 //' Estimates a propper bandwidth \code{h} according
@ -72,18 +70,27 @@ arma::mat rStiefel(arma::uword p, arma::uword q) {
    return Q;
 }

+//' Gradient computation of the loss `L_n(V)`.
+//'
+//' The loss is defined as
+//' \deqn{L_n(V) := \frac{1}{n}\sum_{j=1}^n y_2(V, X_j) - y_1(V, X_j)^2}{L_n(V) := 1/n sum_j( (y_2(V, X_j) - y_1(V, X_j)^2 )}
+//' with
+//' \deqn{y_l(s_0) := \sum_{i=1}^n w_i(V, s_0)Y_i^l}{y_l(s_0) := sum_i(w_i(V, s_0) Y_i^l)}
+//'
+//' @rdname optStiefel
+//' @keywords internal
+//' @name gradient
 double gradient(const arma::mat& X,
                const arma::mat& X_diff,
                const arma::mat& Y,
                const arma::mat& Y_rep,
                const arma::mat& V,
                const double h,
-                arma::mat* G = 0
+                arma::mat* G = 0 // out
 ) {
    using namespace arma;

    uword n = X.n_rows;
-    uword p = X.n_cols;

    // orthogonal projection matrix `Q = I - VV'` for dist computation
    mat Q = -(V * V.t());
@ -138,18 +145,17 @@ double gradient(const arma::mat& X,
 //'     orthogonal space spaned by \code{V}.
 //'
 //' @rdname optStiefel
-//' @name optStiefel
 //' @keywords internal
-double optStiefel(
+//' @name optStiefel_simple
+double optStiefel_simple(
        const arma::mat& X,
        const arma::vec& Y,
        const int k,
        const double h,
        const double tauInitial,
        const double tol,
-        const double slack,
        const int maxIter,
-        arma::mat& V, // out
+        arma::mat& V,      // out
        arma::vec& history // out
 ) {
    using namespace arma;
@ -177,9 +183,9 @@ double optStiefel(
    double loss;
    double error = datum::inf;
    double tau = tauInitial;
-    int count;
+    int iter;
    // main optimization loop
-    for (count = 0; count < maxIter && error > tol; ++count) {
+    for (iter = 0; iter < maxIter && error > tol; ++iter) {
        // calc gradient `G = grad_V(L)(V)`
        mat G;
        loss = gradient(X, X_diff, Y, Y_rep, V, h, &G);
@ -190,11 +196,11 @@ double optStiefel(
        // loss after step `L(V(tau))`
        double loss_tau = gradient(X, X_diff, Y, Y_rep, V_tau, h);

-        // store `loss` in `history` and increase `count`
-        history(count) = loss;
+        // store `loss` in `history` and increase `iter`
+        history(iter) = loss;

        // validate if loss decreased
-        if ((loss_tau - loss) > slack * loss) {
+        if ((loss_tau - loss) > 0.0) {
            // ignore step, retry with half the step size
            tau = tau / 2.;
            error = datum::inf;
@ -208,7 +214,121 @@ double optStiefel(
    }

    // store final `loss`
-    history(count) = loss;
+    history(iter) = loss;
+
+    return loss;
+}
+
+//' @rdname optStiefel
+//' @keywords internal
+//' @name optStiefel_linesearch
+double optStiefel_linesearch(
+        const arma::mat& X,
+        const arma::vec& Y,
+        const int k,
+        const double h,
+        const double tauInitial,
+        const double tol,
+        const int maxIter,
+        const double rho1,
+        const double rho2,
+        const int maxLineSearchIter,
+        arma::mat& V,       // out
+        arma::vec& history  // out
+) {
+    using namespace arma;
+
+    // get dimensions
+    const uword n = X.n_rows; // nr samples
+    const uword p = X.n_cols; // dim of random variable `X`
+    const uword q = p - k;    // rank(V) e.g. dim of ortho space of span{B}
+
+    // all `X_i - X_j` differences, `X_diff.row(i * n + j) = X_i - X_j`
+    mat X_diff(n * n, p);
+    for (uword i = 0, k = 0; i < n; ++i) {
+        for (uword j = 0; j < n; ++j) {
+            X_diff.row(k++) = X.row(i) - X.row(j);
+        }
+    }
+    const vec Y_rep = repmat(Y, n, 1);
+    const mat I_p = eye<mat>(p, p);
+    const mat I_2q = eye<mat>(2 * q, 2 * q);
+
+    // initial start value for `V`
+    V = rStiefel(p, q);
+
+    // first gradient initialization
+    mat G;
+    double loss = gradient(X, X_diff, Y, Y_rep, V, h, &G);
+
+    // set first `loss` in history
+    history(0) = loss;
+
+    // main curvilinear optimization loop
+    double error = datum::inf;
+    int iter = 0;
+    while (iter++ < maxIter && error > tol) {
+        // helper matrices `lU` (linesearch U), `lV` (linesearch V)
+        // as describet in [Wen, Yin] Lemma 4.
+        mat lU = join_rows(G, V); // linesearch "U"
+        mat lV = join_rows(V, -1.0 * G); // linesearch "V"
+        // `A = G V' - V G'`
+        mat A = lU * lV.t();
+
+        // set initial step size for curvilinear line search
+        double tau = tauInitial, lower = 0., upper = datum::inf;
+
+        // TODO: check if `tau` is valid for inverting
+
+        // set line search internal gradient and loss to cycle for next iteration
+        mat V_tau; // next position after a step of size `tau`, a.k.a. `V(tau)`
+        mat G_tau; // gradient of `V` at `V(tau) = V_tau`
+        double loss_tau; // loss (objective) at position `V(tau)`
+        int lsIter = 0; // linesearch iter
+        // start line search
+        do {
+            mat HV = inv(I_2q + (tau/2.) * lV.t() * lU) * lV.t();
+            // next step `V`
+            V_tau = V - tau * (lU * (HV * V));
+
+            double LprimeV = -trace(G.t() * A * V);
+
+            mat lB = I_p - (tau / 2.) * lU * HV;
+
+            loss_tau = gradient(X, X_diff, Y, Y_rep, V_tau, h, &G_tau);
+
+            double LprimeV_tau = -2. * trace(G_tau.t() * lB * A * (V + V_tau));
+
+            // Armijo condition
+            if (loss_tau > loss + (rho1 * tau * LprimeV)) {
+                upper = tau;
+                tau = (lower + upper) / 2.;
+            // Wolfe condition
+            } else if (LprimeV_tau < rho2 * LprimeV) {
+                lower = tau;
+                if (upper == datum::inf) {
+                    tau = 2. * lower;
+                } else {
+                    tau = (lower + upper) / 2.;
+                }
+            } else {
+                break;
+            }
+        } while (++lsIter < maxLineSearchIter);
+
+        // compute error (break condition)
+        // Note: `error` is the decrease of the objective `L_n(V)` and not the
+        //       norm of the gradient as proposed in [Wen, Yin] Algorithm 1.
+        error = loss - loss_tau;
+
+        // cycle `V`, `G` and `loss` for next iteration
+        V = V_tau;
+        loss = loss_tau;
+        G = G_tau;
+
+        // store final `loss`
+        history(iter) = loss;
+    }

    return loss;
 }
@ -232,18 +352,20 @@ double optStiefel(
 //'     and the matrix \code{B} estimated for the model as a orthogonal basis of the
 //'     orthogonal space spaned by \code{V}.
 //'
-//' @rdname cve_cpp_V1
-//' @export
+//' @keywords internal
 // [[Rcpp::export]]
 Rcpp::List cve_cpp(
        const arma::mat& X,
        const arma::vec& Y,
+        const std::string method,
        const int k,
        const double nObs,
        const double tauInitial = 1.,
+        const double rho1 = 0.1,
+        const double rho2 = 0.9,
        const double tol = 1e-5,
-        const double slack = -1e-10,
        const int maxIter = 50,
+        const int maxLineSearchIter = 10,
        const int attempts = 10
 ) {
    using namespace arma;
@ -263,12 +385,27 @@ Rcpp::List cve_cpp(
        // declare output variables
        mat V; // estimated `V` space
        vec hist = vec(history.n_rows, fill::zeros); // optimization history
-        double loss = optStiefel(X, Y, k, h, tauInitial, tol, slack, maxIter, V, hist);
+        double loss;
+        if (method == "simple") {
+            loss = optStiefel_simple(
+                X, Y, k, h,
+                tauInitial, tol, maxIter,
+                V, hist
+            );
+        } else if (method == "linesearch") {
+            loss = optStiefel_linesearch(
+                X, Y, k, h,
+                tauInitial, tol, maxIter, rho1, rho2, maxLineSearchIter,
+                V, hist
+            );
+        } else {
+            throw std::invalid_argument("Unknown method.");
+        }
        if (loss < loss_best) {
            loss_best = loss;
            V_best = V;
        }
-        // write history to history collection
+        // add history to history collection
        history.col(i) = hist;
    }

--- a/CVE/src/RcppExports.cpp
+++ b/CVE/src/RcppExports.cpp
@ -32,21 +32,24 @@ BEGIN_RCPP
 END_RCPP
 }
 // cve_cpp
-Rcpp::List cve_cpp(const arma::mat& X, const arma::vec& Y, const int k, const double nObs, const double tauInitial, const double tol, const double slack, const int maxIter, const int attempts);
-RcppExport SEXP _CVE_cve_cpp(SEXP XSEXP, SEXP YSEXP, SEXP kSEXP, SEXP nObsSEXP, SEXP tauInitialSEXP, SEXP tolSEXP, SEXP slackSEXP, SEXP maxIterSEXP, SEXP attemptsSEXP) {
+Rcpp::List cve_cpp(const arma::mat& X, const arma::vec& Y, const std::string method, const int k, const double nObs, const double tauInitial, const double rho1, const double rho2, const double tol, const int maxIter, const int maxLineSearchIter, const int attempts);
+RcppExport SEXP _CVE_cve_cpp(SEXP XSEXP, SEXP YSEXP, SEXP methodSEXP, SEXP kSEXP, SEXP nObsSEXP, SEXP tauInitialSEXP, SEXP rho1SEXP, SEXP rho2SEXP, SEXP tolSEXP, SEXP maxIterSEXP, SEXP maxLineSearchIterSEXP, SEXP attemptsSEXP) {
 BEGIN_RCPP
    Rcpp::RObject rcpp_result_gen;
    Rcpp::RNGScope rcpp_rngScope_gen;
    Rcpp::traits::input_parameter< const arma::mat& >::type X(XSEXP);
    Rcpp::traits::input_parameter< const arma::vec& >::type Y(YSEXP);
+    Rcpp::traits::input_parameter< const std::string >::type method(methodSEXP);
    Rcpp::traits::input_parameter< const int >::type k(kSEXP);
    Rcpp::traits::input_parameter< const double >::type nObs(nObsSEXP);
    Rcpp::traits::input_parameter< const double >::type tauInitial(tauInitialSEXP);
+    Rcpp::traits::input_parameter< const double >::type rho1(rho1SEXP);
+    Rcpp::traits::input_parameter< const double >::type rho2(rho2SEXP);
    Rcpp::traits::input_parameter< const double >::type tol(tolSEXP);
-    Rcpp::traits::input_parameter< const double >::type slack(slackSEXP);
    Rcpp::traits::input_parameter< const int >::type maxIter(maxIterSEXP);
+    Rcpp::traits::input_parameter< const int >::type maxLineSearchIter(maxLineSearchIterSEXP);
    Rcpp::traits::input_parameter< const int >::type attempts(attemptsSEXP);
-    rcpp_result_gen = Rcpp::wrap(cve_cpp(X, Y, k, nObs, tauInitial, tol, slack, maxIter, attempts));
+    rcpp_result_gen = Rcpp::wrap(cve_cpp(X, Y, method, k, nObs, tauInitial, rho1, rho2, tol, maxIter, maxLineSearchIter, attempts));
    return rcpp_result_gen;
 END_RCPP
 }
@ -54,7 +57,7 @@ END_RCPP
 static const R_CallMethodDef CallEntries[] = {
    {"_CVE_estimateBandwidth", (DL_FUNC) &_CVE_estimateBandwidth, 3},
    {"_CVE_rStiefel", (DL_FUNC) &_CVE_rStiefel, 2},
-    {"_CVE_cve_cpp", (DL_FUNC) &_CVE_cve_cpp, 9},
+    {"_CVE_cve_cpp", (DL_FUNC) &_CVE_cve_cpp, 12},
    {NULL, NULL, 0}
 };

--- a/README.md
+++ b/README.md
@ -0,0 +1,7 @@
+
+# Overview
+- **CVE/**: Contains actual `R` package.
+- **CVE_legacy/**: Contains original (first) `R` implementatin of the CVE method.
+The `*.R` and `*.cpp` files in the root directory are _development_ and _test_ files.
+
+## TODO: README.md
--- a/cve_V1.cpp
+++ b/cve_V1.cpp
@ -1,5 +1,5 @@
 //
-// Standalone implementation for development.
+// Development file.
 //
 // Usage:
 //  ~$ R -e "library(Rcpp); Rcpp::sourceCpp('cve_V1.cpp')"
@ -74,6 +74,15 @@ arma::mat rStiefel(arma::uword p, arma::uword q) {
    return Q;
 }

+//' Gradient computation of the loss `L_n(V)`.
+//'
+//' The loss is defined as
+//' \deqn{L_n(V) := \frac{1}{n}\sum_{j=1}^n (y_2(V, X_j) - y_1(V, X_j)^2)}
+//' with
+//' \deqn{y_l(s_0) := \sum_{i=1}^n w_i(V, s_0)Y_i^l}{y_l(s_0) := sum_i(w_i(V, s_0) Y_i^l)}
+//'
+//' @rdname optStiefel
+//' @keywords internal
 double gradient(const arma::mat& X,
                const arma::mat& X_diff,
                const arma::mat& Y,
@ -178,9 +187,9 @@ double optStiefel(
    double loss;
    double error = datum::inf;
    double tau = tauInitial;
-    int count;
+    int iter;
    // main optimization loop
-    for (count = 0; count < maxIter && error > tol; ++count) {
+    for (iter = 0; iter < maxIter && error > tol; ++iter) {
        // calc gradient `G = grad_V(L)(V)`
        mat G;
        loss = gradient(X, X_diff, Y, Y_rep, V, h, &G);
@ -191,8 +200,8 @@ double optStiefel(
        // loss after step `L(V(tau))`
        double loss_tau = gradient(X, X_diff, Y, Y_rep, V_tau, h);

-        // store `loss` in `history` and increase `count`
-        history(count) = loss;
+        // store `loss` in `history` and increase `iter`
+        history(iter) = loss;

        // validate if loss decreased
        if ((loss_tau - loss) > slack * loss) {
@ -209,7 +218,7 @@ double optStiefel(
    }

    // store final `loss`
-    history(count) = loss;
+    history(iter) = loss;

    return loss;
 }
@ -233,7 +242,7 @@ double optStiefel(
 //'     and the matrix \code{B} estimated for the model as a orthogonal basis of the
 //'     orthogonal space spaned by \code{V}.
 //'
-//' @rdname cve_cpp_V1
+//' @rdname cve_cpp
 //' @export
 // [[Rcpp::export]]
 Rcpp::List cve_cpp(
--- a/cve_V2.cpp
+++ b/cve_V2.cpp
@ -1,5 +1,5 @@
 //
-// Standalone implementation for development.
+// Development file.
 //
 // Usage:
 //  ~$ R -e "library(Rcpp); Rcpp::sourceCpp('cve_V2.cpp')"
@ -149,12 +149,12 @@ double optStiefel(
        const int k,
        const double h,
        const double tauInitial,
-        const double rho1,
-        const double rho2,
        const double tol,
        const int maxIter,
+        const double rho1,
+        const double rho2,
        const int maxLineSeachIter,
-        arma::mat& V, // out
+        arma::mat& V,      // out
        arma::vec& history // out
 ) {
    using namespace arma;
@ -309,7 +309,7 @@ Rcpp::List cve_cpp(
        mat V; // estimated `V` space
        vec hist = vec(history.n_rows, fill::zeros); // optimization history
        double loss = optStiefel(X, Y, k, h,
-            tauInitial, rho1, rho2, tol, maxIter, maxLineSeachIter, V, hist
+            tauInitial, tol, maxIter, rho1, rho2, maxLineSeachIter, V, hist
        );
        if (loss < loss_best) {
            loss_best = loss;
--- a/runtime_tests_grad.cpp
+++ b/runtime_tests_grad.cpp
@ -252,27 +252,49 @@ arma::mat grad_p(const arma::mat& X_ref,
    loss /= n;

    // scaling for gradient summation
-    for (uword k = 0; k < n; ++k) {
-        for (uword l = 0; l < n; ++l) {
+    // this scaling matrix is the lower triangular matrix defined as
+    //
+    // S_kl := (s_{kl} + s_{lk}) D_{kl}
+    // s_ij := (L_n(V, X_j) - (Y_i - y1(V, X_j))^2) W_ij
+    double factor;
+    for (uword l = 0; l < n; ++l) {
+        for (uword k = l + 1; k < n; ++k) {
            tmp = Y[k] - y1[l];
-            S[l * n + k] = (L[l] - (tmp * tmp)) * W[l * n + k] * D[l * n + k];
+            factor = (L[l] - (tmp * tmp)) * W[l * n + k]; // \tile{S}_{kl}
+            tmp = Y[l] - y1[k];
+            factor += (L[k] - (tmp * tmp)) * W[k * n + l]; // \tile{S}_{lk}
+            S[l * n + k] = factor * D[l * n + k]; // (s_kl + s_lk) * D_kl
        }
    }

    // gradient
-    double factor = -2. / (n * h * h);
-    for (uword j = 0; j < q; ++j) {
+    // reuse memory area of `Q`
+    // no longer needed and provides enough space (`q < p`)
+    double* GD = Q;
+    const double* X_l;
+    const double* X_k;
+    for (uword j = 0; j < p; ++j) {
        for (uword i = 0; i < p; ++i) {
            sum = 0.0;
-            for (uword k = 0; k < n; ++k) {
-                for (uword l = 0; l < n; ++l) {
-                    mvm = 0.0;
-                    for (uword m = 0; m < p; ++m) {
-                        mvm += (X[l * p + m] - X[k * p + m]) * V[j * p + m];
-                    }
-                    sum += S[l * n + k] * (X[l * p + i] - X[k * p + i]) * mvm;
+            for (uword l = 0; l < n; ++l) {
+                X_l = X + (l * p);
+                for (uword k = l + 1; k < n; ++k) {
+                    X_k = X + (k * p);
+                    sum += S[l * n + k] * (X_l[i] - X_k[i]) * (X_l[j] - X_k[j]);
                }
            }
+            GD[j * p + i] = sum;
+        }
+    }
+
+    // distance gradient `DG` to gradient by multiplying with `V`
+    factor = -2. / (n * h * h);
+    for (uword i = 0; i < p; ++i) {
+        for (uword j = 0; j < q; ++j) {
+            sum = 0.0;
+            for (uword k = 0; k < p; ++k) {
+                sum += GD[k * p + i] * V[j * p + k];
+            }
            G[j * p + i] = factor * sum;
        }
    }
@ -352,16 +374,17 @@ setup.tests <- function () {
    }
    counter <<- counter + 1
 }
-(mbm <- microbenchmark(
+mbm <- microbenchmark(
    arma = arma_grad(X, X_diff, Y, Y_rep, V, h),
    grad = grad(Xt, Y, V, h),
    grad_p = grad_p(Xt, Y, V, h),
    check = comp.all,
    setup = setup.tests(),
    times = 100L
-))
-
-cat("Total time:", format(Sys.time() - time.start), '\n')
+)
+cat("\033[1m\033[92mTotal time:", format(Sys.time() - time.start), '\n')
+print(mbm)
+cat("\033[0m")

 boxplot(mbm, las = 2, xlab = NULL)