wip: cleanup

2019-09-16 11:23:09 +02:00 · 2019-09-16 11:23:09 +02:00 · 6f46cb2eaf
commit 6f46cb2eaf
parent 03153ef05e
6 changed files with 5 additions and 874 deletions
--- a/CVE_R/R/CVE.R
+++ b/CVE_R/R/CVE.R
@ -134,8 +134,8 @@ cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5,
    }
    # augment result information
-    dr.k$method <- method
+    dr$method <- method
-    dr.k$call <- call
+    dr$call <- call
    class(dr) <- "cve"
    return(dr)
 }
--- a/CVE_R/demo/00Index
+++ b/CVE_R/demo/00Index
@ -1,2 +1,2 @@
 runtime_test    Runtime comparison of CVE against MAVE for M1 - M5 datasets.
-logging         Example of a logger function for cve algorithm analysis.
+logging         Example of a logger function for cve algorithm analysis.
--- a/cve_V0.R
+++ b/cve_V0.R
@ -1,173 +0,0 @@
 #' Euclidean vector norm (2-norm)
 #'
 #' @param x Numeric vector
 #' @return Numeric
 norm2 <- function(x) { return(sum(x^2)) }
 #' Samples uniform from the Stiefel Manifold
 #'
 #' @param p row dim.
 #' @param q col dim.
 #' @return `(p, q)` semi-orthogonal matrix
 rStiefl <- function(p, q) {
    return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
 }
 #' Matrix Trace
 #'
 #' @param M Square matrix
 #' @return Trace \eqn{Tr(M)}
 Tr <- function(M) {
    return(sum(diag(M)))
 }
 #' Null space basis of given matrix `B`
 #'
 #' @param B `(p, q)` matrix
 #' @return Semi-orthogonal `(p, p - q)` matrix `Q` spaning the null space of `B`
 null <- function(M) {
    tmp <- qr(M)
    set <- if(tmp$rank == 0L) seq_len(ncol(M)) else -seq_len(tmp$rank)
    return(qr.Q(tmp, complete = TRUE)[, set, drop = FALSE])
 }
 ####
 #chooses bandwith h according to formula in paper
 #dim...dimension of X vector
 #k... row dim of V (dim times q matrix) corresponding to a basis of orthogonal complement of B in model
 # N...sample size
 #nObs... nObs in bandwith formula
 #tr...trace of sample covariance matrix of X
 estimateBandwidth<-function(X, k, nObs) {
    n <- nrow(X)
    p <- ncol(X)
    X_centered <- scale(X, center = TRUE, scale = FALSE)
    Sigma <- (1 / n) * t(X_centered) %*% X_centered
    quantil <- qchisq((nObs - 1) / (n - 1), k)
    return(2 * quantil * Tr(Sigma) / p)
 }
 ###########
 # evaluates L(V) and returns L_n(V),(L_tilde_n(V,X_i))_{i=1,..,n} and grad_V L_n(V) (p times k) 
 # V... (dim times q) matrix 
 # Xl... output of Xl_fun
 # dtemp...vector with pairwise distances |X_i - X_j|
 # q...output of q_ind function
 # Y... vector with N Y_i values
 # if grad=T, gradient of L(V) also returned
 LV <- function(V, Xl, dtemp, h, q, Y, grad = TRUE) {
  N <- length(Y)
  k <- if (is.vector(V)) { 1 } else { ncol(V) }
  Xlv <- Xl %*% V
  d <- dtemp - ((Xlv^2) %*% rep(1, k))
  w <- dnorm(d / h) / dnorm(0)
  w <- matrix(w, N, q)
  w <- apply(w, 2, function(x) { x / sum(x) })
  y1 <- t(w) %*% Y
  y2 <- t(w) %*% (Y^2)
  sig <- y2 - y1^2
  result <- list(var = mean(sig), sig = sig)
  if (grad == TRUE) {
    tmp1 <- (kronecker(sig, rep(1, N)) - (as.vector(kronecker(rep(1, q), Y)) - kronecker(y1, rep(1, N)))^2)
    if (k == 1) {
      grad_d <- -2 * Xl * as.vector(Xlv)
      grad <- (1 / h^2) * (1 / q) * t(grad_d * as.vector(d) * as.vector(w)) %*% tmp1
    } else {
      grad <- matrix(0, nrow(V), ncol(V))
      for (j in 1:k) {
        grad_d <- -2 * Xl * as.vector(Xlv[ ,j])
        grad[ ,j] <- (1 / h^2) * (1 / q) * t(grad_d * as.vector(d) * as.vector(w)) %*% tmp1
      }
    }
    result$grad = grad
  }
  return(result)
 }
 #### performs stiefle optimization of argmin_{V : V'V=I_k} L_n(V)
 #through curvilinear search with k0 starting values drawn uniformly on stiefel maniquefold
 #dat...(N times dim+1) matrix with first column corresponding to Y values, the other columns 
 #consists of X data matrix, (i.e. dat=cbind(Y,X))
 #h... bandwidth
 #k...row dimension of V that is calculated, corresponds to dimension of orthogonal complement of B
 #k0... number of arbitrary starting values
 #p...fraction of data points used as shifting point
 #maxIter... number of maximal iterations in curvilinear search
 #nObs.. nObs parameter for choosing bandwidth if no h is supplied
 #lambda_0...initial stepsize
 #tol...tolerance for stoping iterations
 #sclack_para...if relative improvment is worse than sclack_para the stepsize is reduced
 #output:
 #est_base...Vhat_k= argmin_V:V'V=I_k L_n(V) a (dim times k) matrix where dim is row-dimension of X data matrix
 #var...value of L_n(Vhat_k)
 #aov_dat... (L_tilde_n(Vhat_k,X_i))_{i=1,..,N}
 #count...number of iterations 
 #h...bandwidth
 cve_R <- function(
        X, Y, k,
        nObs = sqrt(nrow(X)),
        tauInitial = 1.0,
        tol = 1e-3,
        slack = 0,
        maxIter = 50L,
        attempts = 10L
 ) {
    # get dimensions
    n <- nrow(X)
    p <- ncol(X)
    q <- p - k
    Xl <- kronecker(rep(1, n), X) - kronecker(X, rep(1, n))
    Xd <- apply(Xl, 1, norm2)
    I_p <- diag(1, p)
    # estimate bandwidth
    h <- estimateBandwidth(X, k, nObs)
    Lbest <- Inf
    Vend <- mat.or.vec(p, q)
    for (. in 1:attempts) {
        Vnew <- Vold <- rStiefl(p, q)
        Lnew <- Lold <- exp(10000)
        tau <- tauInitial
        error <- Inf
        count <- 0
        while (error > tol & count < maxIter) {
            tmp <- LV(Vold, Xl, Xd, h, n, Y)
            G <- tmp$grad
            Lold <- tmp$var
            W <- tau * (G %*% t(Vold) - Vold %*% t(G))
            Vnew <- solve(I_p + W) %*% (I_p - W) %*% Vold
            Lnew <- LV(Vnew, Xl, Xd, h, n, Y, grad = FALSE)$var
            if ((Lnew - Lold) > slack * Lold) {
                tau = tau / 2
                error <- Inf
            } else {
                error <- norm(Vold %*% t(Vold) - Vnew %*% t(Vnew), "F") / sqrt(2 * k)
                Vold <- Vnew
            }
            count <- count + 1
        }
        if (Lbest > Lnew) {
            Lbest <- Lnew
            Vend <- Vnew
        }
    }
    return(list(
        loss = Lbest,
        V = Vend,
        B = null(Vend),
        h = h
    ))
 }
--- a/cve_V1.cpp
+++ b/cve_V1.cpp
@ -1,330 +0,0 @@
 //
 // Development file.
 //
 // Usage:
 //  ~$ R -e "library(Rcpp); Rcpp::sourceCpp('cve_V1.cpp')"
 //
 // only `RcppArmadillo.h` which includes `Rcpp.h`
 #include <RcppArmadillo.h>
 // through the depends attribute `Rcpp` is tolled to create
 // hooks for `RcppArmadillo` needed by the build process.
 //
 // [[Rcpp::depends(RcppArmadillo)]]
 // required for `R::qchisq()` used in `estimateBandwidth()`
 #include <Rmath.h>
 //' Estimated bandwidth for CVE.
 //'
 //' Estimates a propper bandwidth \code{h} according
 //' \deqn{h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
 //'       h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
 //'
 //' @param X data matrix of dimension (n x p) with n data points X_i of dimension
 //'  q. Therefor each row represents a datapoint of dimension p.
 //' @param k Guess for rank(B).
 //' @param nObs Ether numeric of a function. If specified as numeric value
 //'  its used in the computation of the bandwidth directly. If its a function
 //'  `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not
 //'  supplied at all is to use \code{nObs <- nrow(x)^0.5}.
 //'
 //' @seealso [qchisq()]
 //'
 //' @export
 // [[Rcpp::export]]
 double estimateBandwidth(const arma::mat& X, arma::uword k, double nObs) {
    using namespace arma;
    uword n = X.n_rows; // nr samples
    uword p = X.n_cols; // dimension of rand. var. `X`
    // column mean
    mat M = mean(X);
    // center `X`
    mat C = X.each_row() - M;
    // trace of covariance matrix, `traceSigma = Tr(C' C)`
    double traceSigma = accu(C % C);
    // compute estimated bandwidth
    double qchi2 = R::qchisq((nObs - 1.0) / (n - 1), static_cast<double>(k), 1, 0);
    return 2.0 * qchi2 * traceSigma / (p * n);
 }
 //' Random element from Stiefel Manifold `S(p, q)`.
 //'
 //' Draws an element of \eqn{S(p, q)} which is the Stiefel Manifold.
 //' This is done by taking the Q-component of the QR decomposition
 //' from a `(p, q)` Matrix with independent standart normal entries.
 //' As a semi-orthogonal Matrix the result `V` satisfies \eqn{V'V = I_q}.
 //'
 //' @param p Row dimension
 //' @param q Column dimension
 //'
 //' @return Matrix of dim `(p, q)`.
 //'
 //' @seealso <https://en.wikipedia.org/wiki/Stiefel_manifold>
 //'
 //' @export
 // [[Rcpp::export]]
 arma::mat rStiefel(arma::uword p, arma::uword q) {
    arma::mat Q, R;
    arma::qr_econ(Q, R, arma::randn<arma::mat>(p, q));
    return Q;
 }
 //' Gradient computation of the loss `L_n(V)`.
 //'
 //' The loss is defined as
 //' \deqn{L_n(V) := \frac{1}{n}\sum_{j=1}^n (y_2(V, X_j) - y_1(V, X_j)^2)}
 //' with
 //' \deqn{y_l(s_0) := \sum_{i=1}^n w_i(V, s_0)Y_i^l}{y_l(s_0) := sum_i(w_i(V, s_0) Y_i^l)}
 //'
 //' @rdname optStiefel
 //' @keywords internal
 double gradient(const arma::mat& X,
                const arma::mat& X_diff,
                const arma::mat& Y,
                const arma::mat& Y_rep,
                const arma::mat& V,
                const double h,
                arma::mat* G = 0
 ) {
    using namespace arma;
    uword n = X.n_rows;
    uword p = X.n_cols;
    // orthogonal projection matrix `Q = I - VV'` for dist computation
    mat Q = -(V * V.t());
    Q.diag() += 1;
    // calc pairwise distances as `D` with `D(i, j) = d_i(V, X_j)`
    vec D_vec = sum(square(X_diff * Q), 1);
    mat D = reshape(D_vec, n, n);
    // calc weights as `W` with `W(i, j) = w_i(V, X_j)`
    mat W = exp(D / (-2.0 * h));
    // column-wise normalization via 1-norm
    W = normalise(W, 1);
    vec W_vec = vectorise(W);
    // weighted `Y` means (first and second order)
    vec y1 = W.t() * Y;
    vec y2 = W.t() * square(Y);
    // loss for each `X_i`, meaning `L(i) = L_n(V, X_i)`
    vec L = y2 - square(y1);
    // `loss = L_n(V)`
    double loss = mean(L);
    // check if gradient as output variable is set
    if (G != 0) {
        // `G = grad(L_n(V))` a.k.a. gradient of `L` with respect to `V`
        vec scale = (repelem(L, n, 1) - square(Y_rep - repelem(y1, n, 1))) % W_vec % D_vec;
        mat X_diff_scale = X_diff.each_col() % scale;
        (*G) = X_diff_scale.t() * X_diff * V;
        (*G) *= -2.0 / (h * h * n);
    }
    return loss;
 }
 //' Stiefel Optimization.
 //'
 //' Stiefel Optimization for \code{V} given a dataset \code{X} and responces
 //' \code{Y} for the model \deqn{Y\sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
 //' to compute the matrix `B` such that \eqn{span{B} = span(V)^{\bot}}{%
 //' span(B) = orth(span(B))}.
 //'
 //' @param X data points
 //' @param Y response
 //' @param k assumed \eqn{rank(B)}
 //' @param nObs parameter for bandwidth estimation, typical value
 //'     \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8].
 //' @param tau Initial step size
 //' @param tol Tolerance for update error used for stopping criterion
 //'     \eqn{|| V(j) V(j)' - V(j+1) V(j+1)' ||_2 < tol}{%
 //'          \| V_j V_j' - V_{j+1} V_{j+1}' \|_2 < tol}.
 //' @param maxIter Upper bound of optimization iterations
 //'
 //' @return List containing the bandwidth \code{h}, optimization objective \code{V}
 //'     and the matrix \code{B} estimated for the model as a orthogonal basis of the
 //'     orthogonal space spaned by \code{V}.
 //'
 //' @rdname optStiefel
 //' @keywords internal
 double optStiefel(
        const arma::mat& X,
        const arma::vec& Y,
        const int k,
        const double h,
        const double tauInitial,
        const double tol,
        const double slack,
        const int maxIter,
        arma::mat& V, // out
        arma::vec& history // out
 ) {
    using namespace arma;
    // get dimensions
    const uword n = X.n_rows; // nr samples
    const uword p = X.n_cols; // dim of random variable `X`
    const uword q = p - k;    // rank(V) e.g. dim of ortho space of span{B}
    // all `X_i - X_j` differences, `X_diff.row(i * n + j) = X_i - X_j`
    mat X_diff(n * n, p);
    for (uword i = 0, k = 0; i < n; ++i) {
        for (uword j = 0; j < n; ++j) {
            X_diff.row(k++) = X.row(i) - X.row(j);
        }
    }
    const vec Y_rep = repmat(Y, n, 1);
    const mat I_p = eye<mat>(p, p);
    // initial start value for `V`
    V = rStiefel(p, q);
    // init optimization `loss`es, `error` and stepsize `tau`
    // double loss_next = datum::inf;
    double loss;
    double error = datum::inf;
    double tau = tauInitial;
    int iter;
    // main optimization loop
    for (iter = 0; iter < maxIter && error > tol; ++iter) {
        // calc gradient `G = grad_V(L)(V)`
        mat G;
        loss = gradient(X, X_diff, Y, Y_rep, V, h, &G);
        // matrix `A` for colescy-transform of the gradient
        mat A = tau * ((G * V.t()) - (V * G.t()));
        // next iteration step of `V`
        mat V_tau = inv(I_p + A) * (I_p - A) * V;
        // loss after step `L(V(tau))`
        double loss_tau = gradient(X, X_diff, Y, Y_rep, V_tau, h);
        // store `loss` in `history` and increase `iter`
        history(iter) = loss;
        // validate if loss decreased
        if ((loss_tau - loss) > slack * loss) {
            // ignore step, retry with half the step size
            tau = tau / 2.;
            error = datum::inf;
        } else {
            // compute step error (break condition)
            error = norm((V * V.t()) - (V_tau * V_tau.t()), 2) / (2 * q);
            // shift for next iteration
            V = V_tau;
            loss = loss_tau;
        }
    }
    // store final `loss`
    history(iter) = loss;
    return loss;
 }
 //' Conditional Variance Estimation (CVE) method.
 //'
 //' This version uses a "simple" stiefel optimization schema.
 //'
 //' @param X data points
 //' @param Y response
 //' @param k assumed \eqn{rank(B)}
 //' @param nObs parameter for bandwidth estimation, typical value
 //'     \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8].
 //' @param tau Initial step size (default 1)
 //' @param tol Tolerance for update error used for stopping criterion (default 1e-5)
 //' @param slack Ratio of small negative error allowed in loss optimization (default -1e-10)
 //' @param maxIter Upper bound of optimization iterations (default 50)
 //' @param attempts Number of tryes with new random optimization starting points (default 10)
 //'
 //' @return List containing the bandwidth \code{h}, optimization objective \code{V}
 //'     and the matrix \code{B} estimated for the model as a orthogonal basis of the
 //'     orthogonal space spaned by \code{V}.
 //'
 //' @rdname cve_cpp
 //' @export
 // [[Rcpp::export]]
 Rcpp::List cve_cpp(
        const arma::mat& X,
        const arma::vec& Y,
        const int k,
        const double nObs,
        const double tauInitial = 1.,
        const double tol = 1e-5,
        const double slack = -1e-10,
        const int maxIter = 50,
        const int attempts = 10
 ) {
    using namespace arma;
    // tracker of current best results
    mat V_best;
    double loss_best = datum::inf;
    // estimate bandwidth
    double h = estimateBandwidth(X, k, nObs);
    // loss history for each optimization attempt
    // each column contaions the iteration history for the loss
    mat history = mat(maxIter + 1, attempts);
    // multiple stiefel optimization attempts
    for (int i = 0; i < attempts; ++i) {
        // declare output variables
        mat V; // estimated `V` space
        vec hist = vec(history.n_rows, fill::zeros); // optimization history
        double loss = optStiefel(X, Y, k, h, tauInitial, tol, slack, maxIter, V, hist);
        if (loss < loss_best) {
            loss_best = loss;
            V_best = V;
        }
        // write history to history collection
        history.col(i) = hist;
    }
    // get `B` as kernal of `V.t()`
    mat B = null(V_best.t());
    return Rcpp::List::create(
        Rcpp::Named("history") = history,
        Rcpp::Named("loss") = loss_best,
        Rcpp::Named("h") = h,
        Rcpp::Named("V") = V_best,
        Rcpp::Named("B") = B
    );
 }
 /*** R
 source("CVE/R/datasets.R")
 ds <- dataset()
 print(system.time(
    cve.res <- cve_cpp(
        X = ds$X,
        Y = ds$Y,
        k = ncol(ds$B),
        nObs = sqrt(nrow(ds$X))
    )
 ))
 pdf('plots/cve_V1_history.pdf')
 H <- cve.res$history
 H_i <- H[H[, 1] > 0, 1]
 plot(1:length(H_i), H_i,
    main = "History cve_V1",
    xlab = "Iterations i",
    ylab = expression(loss == L[n](V^{(i)})),
    xlim = c(1, nrow(H)),
    ylim = c(0, max(H)),
    type = "l"
 )
 for (i in 2:ncol(H)) {
    H_i <- H[H[, i] > 0, i]
    lines(1:length(H_i), H_i)
 }
 x.ends <- apply(H, 2, function(h) { length(h[h > 0]) })
 y.ends <- apply(H, 2, function(h) { min(h[h > 0]) })
 points(x.ends, y.ends)
 */
--- a/cve_V2.cpp
+++ b/cve_V2.cpp
@ -1,367 +0,0 @@
 //
 // Development file.
 //
 // Usage:
 //  ~$ R -e "library(Rcpp); Rcpp::sourceCpp('cve_V2.cpp')"
 //
 // only `RcppArmadillo.h` which includes `Rcpp.h`
 #include <RcppArmadillo.h>
 // through the depends attribute `Rcpp` is tolled to create
 // hooks for `RcppArmadillo` needed by the build process.
 //
 // [[Rcpp::depends(RcppArmadillo)]]
 // required for `R::qchisq()` used in `estimateBandwidth()`
 #include <Rmath.h>
 //' Estimated bandwidth for CVE.
 //'
 //' Estimates a propper bandwidth \code{h} according
 //' \deqn{h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
 //'       h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
 //'
 //' @param X data matrix of dimension (n x p) with n data points X_i of dimension
 //'  q. Therefor each row represents a datapoint of dimension p.
 //' @param k Guess for rank(B).
 //' @param nObs Ether numeric of a function. If specified as numeric value
 //'  its used in the computation of the bandwidth directly. If its a function
 //'  `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not
 //'  supplied at all is to use \code{nObs <- nrow(x)^0.5}.
 //'
 //' @seealso [qchisq()]
 //'
 //' @export
 // [[Rcpp::export]]
 double estimateBandwidth(const arma::mat& X, arma::uword k, double nObs) {
    using namespace arma;
    uword n = X.n_rows; // nr samples
    uword p = X.n_cols; // dimension of rand. var. `X`
    // column mean
    mat M = mean(X);
    // center `X`
    mat C = X.each_row() - M;
    // trace of covariance matrix, `traceSigma = Tr(C' C)`
    double traceSigma = accu(C % C);
    // compute estimated bandwidth
    double qchi2 = R::qchisq((nObs - 1.0) / (n - 1), static_cast<double>(k), 1, 0);
    return 2.0 * qchi2 * traceSigma / (p * n);
 }
 //' Random element from Stiefel Manifold `S(p, q)`.
 //'
 //' Draws an element of \eqn{S(p, q)} which is the Stiefel Manifold.
 //' This is done by taking the Q-component of the QR decomposition
 //' from a `(p, q)` Matrix with independent standart normal entries.
 //' As a semi-orthogonal Matrix the result `V` satisfies \eqn{V'V = I_q}.
 //'
 //' @param p Row dimension
 //' @param q Column dimension
 //'
 //' @return Matrix of dim `(p, q)`.
 //'
 //' @seealso <https://en.wikipedia.org/wiki/Stiefel_manifold>
 //'
 //' @export
 // [[Rcpp::export]]
 arma::mat rStiefel(arma::uword p, arma::uword q) {
    arma::mat Q, R;
    arma::qr_econ(Q, R, arma::randn<arma::mat>(p, q));
    return Q;
 }
 double gradient(const arma::mat& X,
                const arma::mat& X_diff,
                const arma::mat& Y,
                const arma::mat& Y_rep,
                const arma::mat& V,
                const double h,
                arma::mat* G = 0
 ) {
    using namespace arma;
    uword n = X.n_rows;
    uword p = X.n_cols;
    // orthogonal projection matrix `Q = I - VV'` for dist computation
    mat Q = -(V * V.t());
    Q.diag() += 1;
    // calc pairwise distances as `D` with `D(i, j) = d_i(V, X_j)`
    vec D_vec = sum(square(X_diff * Q), 1);
    mat D = reshape(D_vec, n, n);
    // calc weights as `W` with `W(i, j) = w_i(V, X_j)`
    mat W = exp(D / (-2.0 * h));
    // column-wise normalization via 1-norm
    W = normalise(W, 1);
    vec W_vec = vectorise(W);
    // weighted `Y` means (first and second order)
    vec y1 = W.t() * Y;
    vec y2 = W.t() * square(Y);
    // loss for each `X_i`, meaning `L(i) = L_n(V, X_i)`
    vec L = y2 - square(y1);
    // `loss = L_n(V)`
    double loss = mean(L);
    // check if gradient as output variable is set
    if (G != 0) {
        // `G = grad(L_n(V))` a.k.a. gradient of `L` with respect to `V`
        vec scale = (repelem(L, n, 1) - square(Y_rep - repelem(y1, n, 1))) % W_vec % D_vec;
        mat X_diff_scale = X_diff.each_col() % scale;
        (*G) = X_diff_scale.t() * X_diff * V;
        (*G) *= -2.0 / (h * h * n);
    }
    return loss;
 }
 //' Stiefel Optimization with curvilinear linesearch.
 //'
 //' TODO: finish doc. comment
 //' Stiefel Optimization for \code{V} given a dataset \code{X} and responces
 //' \code{Y} for the model \deqn{Y\sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
 //' to compute the matrix `B` such that \eqn{span{B} = span(V)^{\bot}}{%
 //' span(B) = orth(span(B))}.
 //' The curvilinear linesearch uses Armijo-Wolfe conditions:
 //      \deqn{L(V(tau)) > L(V(0)) + rho_1 * tau * L(V(0))'}
 //'     \deqn{L(V(tau))' < rho_2 * L(V(0))'}
 //'
 //' @param X data points
 //' @param Y response
 //' @param k assumed \eqn{rank(B)}
 //' @param nObs parameter for bandwidth estimation, typical value
 //'     \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8].
 //' @param tau Initial step size
 //' @param tol Tolerance for update error used for stopping criterion
 //' @param maxIter Upper bound of optimization iterations
 //'
 //' @return List containing the bandwidth \code{h}, optimization objective \code{V}
 //'     and the matrix \code{B} estimated for the model as a orthogonal basis of the
 //'     orthogonal space spaned by \code{V}.
 //'
 //' @rdname optStiefel
 //' @keywords internal
 double optStiefel(
        const arma::mat& X,
        const arma::vec& Y,
        const int k,
        const double h,
        const double tauInitial,
        const double tol,
        const int maxIter,
        const double rho1,
        const double rho2,
        const int maxLineSeachIter,
        arma::mat& V,      // out
        arma::vec& history // out
 ) {
    using namespace arma;
    // get dimensions
    const uword n = X.n_rows; // nr samples
    const uword p = X.n_cols; // dim of random variable `X`
    const uword q = p - k;    // rank(V) e.g. dim of ortho space of span{B}
    // all `X_i - X_j` differences, `X_diff.row(i * n + j) = X_i - X_j`
    mat X_diff(n * n, p);
    for (uword i = 0, k = 0; i < n; ++i) {
        for (uword j = 0; j < n; ++j) {
            X_diff.row(k++) = X.row(i) - X.row(j);
        }
    }
    const vec Y_rep = repmat(Y, n, 1);
    const mat I_p = eye<mat>(p, p);
    const mat I_2q = eye<mat>(2 * q, 2 * q);
    // initial start value for `V`
    V = rStiefel(p, q);
    // first gradient initialization
    mat G;
    double loss = gradient(X, X_diff, Y, Y_rep, V, h, &G);
    // set first `loss` in history
    history(0) = loss;
    // main curvilinear optimization loop
    double error = datum::inf;
    int iter = 0;
    while (iter++ < maxIter && error > tol) {
        // helper matrices `lU` (linesearch U), `lV` (linesearch V)
        // as describet in [Wen, Yin] Lemma 4.
        mat lU = join_rows(G, V); // linesearch "U"
        mat lV = join_rows(V, -1.0 * G); // linesearch "V"
        // `A = G V' - V G'`
        mat A = lU * lV.t();
        // set initial step size for curvilinear line search
        double tau = tauInitial, lower = 0., upper = datum::inf;
        // check if `tau` is valid for inverting 
        // set line search internal gradient and loss to cycle for next iteration
        mat V_tau; // next position after a step of size `tau`, a.k.a. `V(tau)`
        mat G_tau; // gradient of `V` at `V(tau) = V_tau`
        double loss_tau; // loss (objective) at position `V(tau)`
        int lsIter = 0; // linesearch iter
        // start line search
        do {
            mat HV = inv(I_2q + (tau/2.) * lV.t() * lU) * lV.t();
            // next step `V`
            V_tau = V - tau * (lU * (HV * V));
            double LprimeV = -trace(G.t() * A * V);
            mat lB = I_p - (tau / 2.) * lU * HV;
            loss_tau = gradient(X, X_diff, Y, Y_rep, V_tau, h, &G_tau);
            double LprimeV_tau = -2. * trace(G_tau.t() * lB * A * (V + V_tau));
            // Armijo condition
            if (loss_tau > loss + (rho1 * tau * LprimeV)) {
                upper = tau;
                tau = (lower + upper) / 2.;
            // Wolfe condition
            } else if (LprimeV_tau < rho2 * LprimeV) {
                lower = tau;
                if (upper == datum::inf) {
                    tau = 2. * lower;
                } else {
                    tau = (lower + upper) / 2.;
                }
            } else {
                break;
            }
        } while (++lsIter < maxLineSeachIter);
        // compute error (break condition)
        // Note: `error` is the decrease of the objective `L_n(V)` and not the
        //       norm of the gradient as proposed in [Wen, Yin] Algorithm 1.
        error = loss - loss_tau;
        // cycle `V`, `G` and `loss` for next iteration
        V = V_tau;
        loss = loss_tau;
        G = G_tau;
        // store final `loss`
        history(iter) = loss;
    }
    return loss;
 }
 //' Conditional Variance Estimation (CVE) method.
 //'
 //' This version uses a curvilinear linesearch for the stiefel optimization.
 //'
 //' @param X data points
 //' @param Y response
 //' @param k assumed \eqn{rank(B)}
 //' @param nObs parameter for bandwidth estimation, typical value
 //'     \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8].
 //' @param tau Initial step size (default 1)
 //' @param tol Tolerance for update error used for stopping criterion (default 1e-5)
 //' @param slack Ratio of small negative error allowed in loss optimization (default -1e-10)
 //' @param maxIter Upper bound of optimization iterations (default 50)
 //' @param attempts Number of tryes with new random optimization starting points (default 10)
 //'
 //' @return List containing the bandwidth \code{h}, optimization objective \code{V}
 //'     and the matrix \code{B} estimated for the model as a orthogonal basis of the
 //'     orthogonal space spaned by \code{V}.
 //'
 //' @rdname cve_cpp_V2
 //' @export
 // [[Rcpp::export]]
 Rcpp::List cve_cpp(
        const arma::mat& X,
        const arma::vec& Y,
        const int k,
        const double nObs,
        const double tauInitial = 1.,
        const double rho1 = 0.05,
        const double rho2 = 0.95,
        const double tol = 1e-6,
        const int maxIter = 50,
        const int maxLineSeachIter = 10,
        const int attempts = 10
 ) {
    using namespace arma;
    // tracker of current best results
    mat V_best;
    double loss_best = datum::inf;
    // estimate bandwidth
    double h = estimateBandwidth(X, k, nObs);
    // loss history for each optimization attempt
    // each column contaions the iteration history for the loss
    mat history = mat(maxIter + 1, attempts);
    // multiple stiefel optimization attempts
    for (int i = 0; i < attempts; ++i) {
        // declare output variables
        mat V; // estimated `V` space
        vec hist = vec(history.n_rows, fill::zeros); // optimization history
        double loss = optStiefel(X, Y, k, h,
            tauInitial, tol, maxIter, rho1, rho2, maxLineSeachIter, V, hist
        );
        if (loss < loss_best) {
            loss_best = loss;
            V_best = V;
        }
        // write history to history collection
        history.col(i) = hist;
    }
    // get `B` as kernal of `V.t()`
    mat B = null(V_best.t());
    return Rcpp::List::create(
        Rcpp::Named("history") = history,
        Rcpp::Named("loss") = loss_best,
        Rcpp::Named("h") = h,
        Rcpp::Named("V") = V_best,
        Rcpp::Named("B") = B
    );
 }
 /*** R
 source("CVE/R/datasets.R")
 ds <- dataset()
 print(system.time(
    cve.res <- cve_cpp(
        X = ds$X,
        Y = ds$Y,
        k = ncol(ds$B),
        nObs = sqrt(nrow(ds$X))
    )
 ))
 pdf('plots/cve_V2_history.pdf')
 H <- cve.res$history
 H_i <- H[H[, 1] > 0, 1]
 plot(1:length(H_i), H_i,
    main = "History cve_V2",
    xlab = "Iterations i",
    ylab = expression(loss == L[n](V^{(i)})),
    xlim = c(1, nrow(H)),
    ylim = c(0, max(H)),
    type = "l"
 )
 for (i in 2:ncol(H)) {
    H_i <- H[H[, i] > 0, i]
    lines(1:length(H_i), H_i)
 }
 x.ends <- apply(H, 2, function(h) { length(h[h > 0]) })
 y.ends <- apply(H, 2, function(h) { min(h[h > 0]) })
 points(x.ends, y.ends)
 */
--- a/runtime_test.R
+++ b/runtime_test.R
@ -1,5 +1,7 @@
 # Usage:
 # ~$ Rscript runtime_test.R
 # library(CVEpureR) # load CVE's pure R implementation
 library(CVE) # load CVE
 #' Writes log information to console. (to not get bored^^)
 tell.user <- function(name, start.time, i, length) {
@ -24,7 +26,6 @@ dataset.names <- c("M1", "M2", "M3", "M4", "M5")
 # Set used CVE method
 methods <- c("simple") # c("legacy", "simple", "sgd", "linesearch")
 library(CVE) # load CVE
 if ("legacy" %in% methods) {
    # Source legacy code (but only if needed)
    source("CVE_legacy/function_script.R")
`@ -1,2 +1,2 @@`
	`runtime_test Runtime comparison of CVE against MAVE for M1 - M5 datasets.`	`runtime_test Runtime comparison of CVE against MAVE for M1 - M5 datasets.`
	`logging Example of a logger function for cve algorithm analysis.`	`logging Example of a logger function for cve algorithm analysis.`