wip: cleanup

2019-09-16 11:23:09 +02:00 · 2019-09-16 11:23:09 +02:00 · 6f46cb2eaf
commit 6f46cb2eaf
parent 03153ef05e
6 changed files with 5 additions and 874 deletions
--- a/CVE_R/R/CVE.R
+++ b/CVE_R/R/CVE.R
@ -134,8 +134,8 @@ cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5,
    }

    # augment result information
-    dr.k$method <- method
-    dr.k$call <- call
+    dr$method <- method
+    dr$call <- call
    class(dr) <- "cve"
    return(dr)
 }
--- a/CVE_R/demo/00Index
+++ b/CVE_R/demo/00Index
@ -1,2 +1,2 @@
 runtime_test    Runtime comparison of CVE against MAVE for M1 - M5 datasets.
-logging         Example of a logger function for cve algorithm analysis.
+logging         Example of a logger function for cve algorithm analysis.
--- a/cve_V0.R
+++ b/cve_V0.R
@ -1,173 +0,0 @@
-
-#' Euclidean vector norm (2-norm)
-#'
-#' @param x Numeric vector
-#' @return Numeric
-norm2 <- function(x) { return(sum(x^2)) }
-
-#' Samples uniform from the Stiefel Manifold
-#'
-#' @param p row dim.
-#' @param q col dim.
-#' @return `(p, q)` semi-orthogonal matrix
-rStiefl <- function(p, q) {
-    return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
-}
-
-#' Matrix Trace
-#'
-#' @param M Square matrix
-#' @return Trace \eqn{Tr(M)}
-Tr <- function(M) {
-    return(sum(diag(M)))
-}
-
-#' Null space basis of given matrix `B`
-#'
-#' @param B `(p, q)` matrix
-#' @return Semi-orthogonal `(p, p - q)` matrix `Q` spaning the null space of `B`
-null <- function(M) {
-    tmp <- qr(M)
-    set <- if(tmp$rank == 0L) seq_len(ncol(M)) else -seq_len(tmp$rank)
-    return(qr.Q(tmp, complete = TRUE)[, set, drop = FALSE])
-}
-
-####
-#chooses bandwith h according to formula in paper
-#dim...dimension of X vector
-#k... row dim of V (dim times q matrix) corresponding to a basis of orthogonal complement of B in model
-# N...sample size
-#nObs... nObs in bandwith formula
-#tr...trace of sample covariance matrix of X
-estimateBandwidth<-function(X, k, nObs) {
-    n <- nrow(X)
-    p <- ncol(X)
-
-    X_centered <- scale(X, center = TRUE, scale = FALSE)
-    Sigma <- (1 / n) * t(X_centered) %*% X_centered
-
-    quantil <- qchisq((nObs - 1) / (n - 1), k)
-    return(2 * quantil * Tr(Sigma) / p)
-}
-
-###########
-# evaluates L(V) and returns L_n(V),(L_tilde_n(V,X_i))_{i=1,..,n} and grad_V L_n(V) (p times k) 
-# V... (dim times q) matrix 
-# Xl... output of Xl_fun
-# dtemp...vector with pairwise distances |X_i - X_j|
-# q...output of q_ind function
-# Y... vector with N Y_i values
-# if grad=T, gradient of L(V) also returned
-LV <- function(V, Xl, dtemp, h, q, Y, grad = TRUE) {
-  N <- length(Y)
-  k <- if (is.vector(V)) { 1 } else { ncol(V) }
-  Xlv <- Xl %*% V
-  d <- dtemp - ((Xlv^2) %*% rep(1, k))
-  w <- dnorm(d / h) / dnorm(0)
-  w <- matrix(w, N, q)
-  w <- apply(w, 2, function(x) { x / sum(x) })
-  y1 <- t(w) %*% Y
-  y2 <- t(w) %*% (Y^2)
-  sig <- y2 - y1^2
-  
-  result <- list(var = mean(sig), sig = sig)
-  if (grad == TRUE) {
-    tmp1 <- (kronecker(sig, rep(1, N)) - (as.vector(kronecker(rep(1, q), Y)) - kronecker(y1, rep(1, N)))^2)
-    if (k == 1) {
-      grad_d <- -2 * Xl * as.vector(Xlv)
-      grad <- (1 / h^2) * (1 / q) * t(grad_d * as.vector(d) * as.vector(w)) %*% tmp1
-    } else {
-      grad <- matrix(0, nrow(V), ncol(V))
-      for (j in 1:k) {
-        grad_d <- -2 * Xl * as.vector(Xlv[ ,j])
-        grad[ ,j] <- (1 / h^2) * (1 / q) * t(grad_d * as.vector(d) * as.vector(w)) %*% tmp1
-      }
-    }
-    result$grad = grad
-  }
-
-  return(result)
-}
-#### performs stiefle optimization of argmin_{V : V'V=I_k} L_n(V)
-#through curvilinear search with k0 starting values drawn uniformly on stiefel maniquefold
-#dat...(N times dim+1) matrix with first column corresponding to Y values, the other columns 
-#consists of X data matrix, (i.e. dat=cbind(Y,X))
-#h... bandwidth
-#k...row dimension of V that is calculated, corresponds to dimension of orthogonal complement of B
-#k0... number of arbitrary starting values
-#p...fraction of data points used as shifting point
-#maxIter... number of maximal iterations in curvilinear search
-#nObs.. nObs parameter for choosing bandwidth if no h is supplied
-#lambda_0...initial stepsize
-#tol...tolerance for stoping iterations
-#sclack_para...if relative improvment is worse than sclack_para the stepsize is reduced
-#output:
-#est_base...Vhat_k= argmin_V:V'V=I_k L_n(V) a (dim times k) matrix where dim is row-dimension of X data matrix
-#var...value of L_n(Vhat_k)
-#aov_dat... (L_tilde_n(Vhat_k,X_i))_{i=1,..,N}
-#count...number of iterations 
-#h...bandwidth
-cve_R <- function(
-        X, Y, k,
-        nObs = sqrt(nrow(X)),
-        tauInitial = 1.0,
-        tol = 1e-3,
-        slack = 0,
-        maxIter = 50L,
-        attempts = 10L
-) {
-    # get dimensions
-    n <- nrow(X)
-    p <- ncol(X)
-    q <- p - k
-
-    Xl <- kronecker(rep(1, n), X) - kronecker(X, rep(1, n))
-    Xd <- apply(Xl, 1, norm2)
-    I_p <- diag(1, p)
-    # estimate bandwidth
-    h <- estimateBandwidth(X, k, nObs)
-
-    Lbest <- Inf
-    Vend <- mat.or.vec(p, q)
-
-    for (. in 1:attempts) {
-        Vnew <- Vold <- rStiefl(p, q)
-        Lnew <- Lold <- exp(10000)
-        tau <- tauInitial
-
-        error <- Inf
-        count <- 0
-        while (error > tol & count < maxIter) {
-
-            tmp <- LV(Vold, Xl, Xd, h, n, Y)
-            G <- tmp$grad
-            Lold <- tmp$var
-
-            W <- tau * (G %*% t(Vold) - Vold %*% t(G))
-            Vnew <- solve(I_p + W) %*% (I_p - W) %*% Vold
-            
-            Lnew <- LV(Vnew, Xl, Xd, h, n, Y, grad = FALSE)$var
-
-            if ((Lnew - Lold) > slack * Lold) {
-                tau = tau / 2
-                error <- Inf
-            } else {
-                error <- norm(Vold %*% t(Vold) - Vnew %*% t(Vnew), "F") / sqrt(2 * k)
-                Vold <- Vnew
-            }
-            count <- count + 1
-        }
-
-        if (Lbest > Lnew) {
-            Lbest <- Lnew
-            Vend <- Vnew
-        }
-    }
-
-    return(list(
-        loss = Lbest,
-        V = Vend,
-        B = null(Vend),
-        h = h
-    ))
-}
--- a/cve_V1.cpp
+++ b/cve_V1.cpp
@ -1,330 +0,0 @@
-//
-// Development file.
-//
-// Usage:
-//  ~$ R -e "library(Rcpp); Rcpp::sourceCpp('cve_V1.cpp')"
-//
-
-// only `RcppArmadillo.h` which includes `Rcpp.h`
-#include <RcppArmadillo.h>
-
-// through the depends attribute `Rcpp` is tolled to create
-// hooks for `RcppArmadillo` needed by the build process.
-//
-// [[Rcpp::depends(RcppArmadillo)]]
-
-// required for `R::qchisq()` used in `estimateBandwidth()`
-#include <Rmath.h>
-
-//' Estimated bandwidth for CVE.
-//'
-//' Estimates a propper bandwidth \code{h} according
-//' \deqn{h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
-//'       h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
-//'
-//' @param X data matrix of dimension (n x p) with n data points X_i of dimension
-//'  q. Therefor each row represents a datapoint of dimension p.
-//' @param k Guess for rank(B).
-//' @param nObs Ether numeric of a function. If specified as numeric value
-//'  its used in the computation of the bandwidth directly. If its a function
-//'  `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not
-//'  supplied at all is to use \code{nObs <- nrow(x)^0.5}.
-//'
-//' @seealso [qchisq()]
-//'
-//' @export
-// [[Rcpp::export]]
-double estimateBandwidth(const arma::mat& X, arma::uword k, double nObs) {
-    using namespace arma;
-
-    uword n = X.n_rows; // nr samples
-    uword p = X.n_cols; // dimension of rand. var. `X`
-
-    // column mean
-    mat M = mean(X);
-    // center `X`
-    mat C = X.each_row() - M;
-    // trace of covariance matrix, `traceSigma = Tr(C' C)`
-    double traceSigma = accu(C % C);
-    // compute estimated bandwidth
-    double qchi2 = R::qchisq((nObs - 1.0) / (n - 1), static_cast<double>(k), 1, 0);
-
-    return 2.0 * qchi2 * traceSigma / (p * n);
-}
-
-//' Random element from Stiefel Manifold `S(p, q)`.
-//'
-//' Draws an element of \eqn{S(p, q)} which is the Stiefel Manifold.
-//' This is done by taking the Q-component of the QR decomposition
-//' from a `(p, q)` Matrix with independent standart normal entries.
-//' As a semi-orthogonal Matrix the result `V` satisfies \eqn{V'V = I_q}.
-//'
-//' @param p Row dimension
-//' @param q Column dimension
-//'
-//' @return Matrix of dim `(p, q)`.
-//'
-//' @seealso <https://en.wikipedia.org/wiki/Stiefel_manifold>
-//'
-//' @export
-// [[Rcpp::export]]
-arma::mat rStiefel(arma::uword p, arma::uword q) {
-    arma::mat Q, R;
-    arma::qr_econ(Q, R, arma::randn<arma::mat>(p, q));
-    return Q;
-}
-
-//' Gradient computation of the loss `L_n(V)`.
-//'
-//' The loss is defined as
-//' \deqn{L_n(V) := \frac{1}{n}\sum_{j=1}^n (y_2(V, X_j) - y_1(V, X_j)^2)}
-//' with
-//' \deqn{y_l(s_0) := \sum_{i=1}^n w_i(V, s_0)Y_i^l}{y_l(s_0) := sum_i(w_i(V, s_0) Y_i^l)}
-//'
-//' @rdname optStiefel
-//' @keywords internal
-double gradient(const arma::mat& X,
-                const arma::mat& X_diff,
-                const arma::mat& Y,
-                const arma::mat& Y_rep,
-                const arma::mat& V,
-                const double h,
-                arma::mat* G = 0
-) {
-    using namespace arma;
-
-    uword n = X.n_rows;
-    uword p = X.n_cols;
-
-    // orthogonal projection matrix `Q = I - VV'` for dist computation
-    mat Q = -(V * V.t());
-    Q.diag() += 1;
-    // calc pairwise distances as `D` with `D(i, j) = d_i(V, X_j)`
-    vec D_vec = sum(square(X_diff * Q), 1);
-    mat D = reshape(D_vec, n, n);
-    // calc weights as `W` with `W(i, j) = w_i(V, X_j)`
-    mat W = exp(D / (-2.0 * h));
-    // column-wise normalization via 1-norm
-    W = normalise(W, 1);
-    vec W_vec = vectorise(W);
-    // weighted `Y` means (first and second order)
-    vec y1 = W.t() * Y;
-    vec y2 = W.t() * square(Y);
-    // loss for each `X_i`, meaning `L(i) = L_n(V, X_i)`
-    vec L = y2 - square(y1);
-    // `loss = L_n(V)`
-    double loss = mean(L);
-    // check if gradient as output variable is set
-    if (G != 0) {
-        // `G = grad(L_n(V))` a.k.a. gradient of `L` with respect to `V`
-        vec scale = (repelem(L, n, 1) - square(Y_rep - repelem(y1, n, 1))) % W_vec % D_vec;
-        mat X_diff_scale = X_diff.each_col() % scale;
-        (*G) = X_diff_scale.t() * X_diff * V;
-        (*G) *= -2.0 / (h * h * n);
-    }
-
-    return loss;
-}
-
-//' Stiefel Optimization.
-//'
-//' Stiefel Optimization for \code{V} given a dataset \code{X} and responces
-//' \code{Y} for the model \deqn{Y\sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
-//' to compute the matrix `B` such that \eqn{span{B} = span(V)^{\bot}}{%
-//' span(B) = orth(span(B))}.
-//'
-//' @param X data points
-//' @param Y response
-//' @param k assumed \eqn{rank(B)}
-//' @param nObs parameter for bandwidth estimation, typical value
-//'     \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8].
-//' @param tau Initial step size
-//' @param tol Tolerance for update error used for stopping criterion
-//'     \eqn{|| V(j) V(j)' - V(j+1) V(j+1)' ||_2 < tol}{%
-//'          \| V_j V_j' - V_{j+1} V_{j+1}' \|_2 < tol}.
-//' @param maxIter Upper bound of optimization iterations
-//'
-//' @return List containing the bandwidth \code{h}, optimization objective \code{V}
-//'     and the matrix \code{B} estimated for the model as a orthogonal basis of the
-//'     orthogonal space spaned by \code{V}.
-//'
-//' @rdname optStiefel
-//' @keywords internal
-double optStiefel(
-        const arma::mat& X,
-        const arma::vec& Y,
-        const int k,
-        const double h,
-        const double tauInitial,
-        const double tol,
-        const double slack,
-        const int maxIter,
-        arma::mat& V, // out
-        arma::vec& history // out
-) {
-    using namespace arma;
-
-    // get dimensions
-    const uword n = X.n_rows; // nr samples
-    const uword p = X.n_cols; // dim of random variable `X`
-    const uword q = p - k;    // rank(V) e.g. dim of ortho space of span{B}
-
-    // all `X_i - X_j` differences, `X_diff.row(i * n + j) = X_i - X_j`
-    mat X_diff(n * n, p);
-    for (uword i = 0, k = 0; i < n; ++i) {
-        for (uword j = 0; j < n; ++j) {
-            X_diff.row(k++) = X.row(i) - X.row(j);
-        }
-    }
-    const vec Y_rep = repmat(Y, n, 1);
-    const mat I_p = eye<mat>(p, p);
-
-    // initial start value for `V`
-    V = rStiefel(p, q);
-
-    // init optimization `loss`es, `error` and stepsize `tau`
-    // double loss_next = datum::inf;
-    double loss;
-    double error = datum::inf;
-    double tau = tauInitial;
-    int iter;
-    // main optimization loop
-    for (iter = 0; iter < maxIter && error > tol; ++iter) {
-        // calc gradient `G = grad_V(L)(V)`
-        mat G;
-        loss = gradient(X, X_diff, Y, Y_rep, V, h, &G);
-        // matrix `A` for colescy-transform of the gradient
-        mat A = tau * ((G * V.t()) - (V * G.t()));
-        // next iteration step of `V`
-        mat V_tau = inv(I_p + A) * (I_p - A) * V;
-        // loss after step `L(V(tau))`
-        double loss_tau = gradient(X, X_diff, Y, Y_rep, V_tau, h);
-
-        // store `loss` in `history` and increase `iter`
-        history(iter) = loss;
-
-        // validate if loss decreased
-        if ((loss_tau - loss) > slack * loss) {
-            // ignore step, retry with half the step size
-            tau = tau / 2.;
-            error = datum::inf;
-        } else {
-            // compute step error (break condition)
-            error = norm((V * V.t()) - (V_tau * V_tau.t()), 2) / (2 * q);
-            // shift for next iteration
-            V = V_tau;
-            loss = loss_tau;
-        }
-    }
-
-    // store final `loss`
-    history(iter) = loss;
-
-    return loss;
-}
-
-//' Conditional Variance Estimation (CVE) method.
-//'
-//' This version uses a "simple" stiefel optimization schema.
-//'
-//' @param X data points
-//' @param Y response
-//' @param k assumed \eqn{rank(B)}
-//' @param nObs parameter for bandwidth estimation, typical value
-//'     \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8].
-//' @param tau Initial step size (default 1)
-//' @param tol Tolerance for update error used for stopping criterion (default 1e-5)
-//' @param slack Ratio of small negative error allowed in loss optimization (default -1e-10)
-//' @param maxIter Upper bound of optimization iterations (default 50)
-//' @param attempts Number of tryes with new random optimization starting points (default 10)
-//'
-//' @return List containing the bandwidth \code{h}, optimization objective \code{V}
-//'     and the matrix \code{B} estimated for the model as a orthogonal basis of the
-//'     orthogonal space spaned by \code{V}.
-//'
-//' @rdname cve_cpp
-//' @export
-// [[Rcpp::export]]
-Rcpp::List cve_cpp(
-        const arma::mat& X,
-        const arma::vec& Y,
-        const int k,
-        const double nObs,
-        const double tauInitial = 1.,
-        const double tol = 1e-5,
-        const double slack = -1e-10,
-        const int maxIter = 50,
-        const int attempts = 10
-) {
-    using namespace arma;
-
-    // tracker of current best results
-    mat V_best;
-    double loss_best = datum::inf;
-    // estimate bandwidth
-    double h = estimateBandwidth(X, k, nObs);
-
-    // loss history for each optimization attempt
-    // each column contaions the iteration history for the loss
-    mat history = mat(maxIter + 1, attempts);
-
-    // multiple stiefel optimization attempts
-    for (int i = 0; i < attempts; ++i) {
-        // declare output variables
-        mat V; // estimated `V` space
-        vec hist = vec(history.n_rows, fill::zeros); // optimization history
-        double loss = optStiefel(X, Y, k, h, tauInitial, tol, slack, maxIter, V, hist);
-        if (loss < loss_best) {
-            loss_best = loss;
-            V_best = V;
-        }
-        // write history to history collection
-        history.col(i) = hist;
-    }
-
-    // get `B` as kernal of `V.t()`
-    mat B = null(V_best.t());
-
-    return Rcpp::List::create(
-        Rcpp::Named("history") = history,
-        Rcpp::Named("loss") = loss_best,
-        Rcpp::Named("h") = h,
-        Rcpp::Named("V") = V_best,
-        Rcpp::Named("B") = B
-    );
-}
-
-/*** R
-
-source("CVE/R/datasets.R")
-ds <- dataset()
-
-print(system.time(
-    cve.res <- cve_cpp(
-        X = ds$X,
-        Y = ds$Y,
-        k = ncol(ds$B),
-        nObs = sqrt(nrow(ds$X))
-    )
-))
-
-pdf('plots/cve_V1_history.pdf')
-H <- cve.res$history
-H_i <- H[H[, 1] > 0, 1]
-plot(1:length(H_i), H_i,
-    main = "History cve_V1",
-    xlab = "Iterations i",
-    ylab = expression(loss == L[n](V^{(i)})),
-    xlim = c(1, nrow(H)),
-    ylim = c(0, max(H)),
-    type = "l"
-)
-for (i in 2:ncol(H)) {
-    H_i <- H[H[, i] > 0, i]
-    lines(1:length(H_i), H_i)
-}
-x.ends <- apply(H, 2, function(h) { length(h[h > 0]) })
-y.ends <- apply(H, 2, function(h) { min(h[h > 0]) })
-points(x.ends, y.ends)
-
-*/
--- a/cve_V2.cpp
+++ b/cve_V2.cpp
@ -1,367 +0,0 @@
-//
-// Development file.
-//
-// Usage:
-//  ~$ R -e "library(Rcpp); Rcpp::sourceCpp('cve_V2.cpp')"
-//
-
-// only `RcppArmadillo.h` which includes `Rcpp.h`
-#include <RcppArmadillo.h>
-
-// through the depends attribute `Rcpp` is tolled to create
-// hooks for `RcppArmadillo` needed by the build process.
-//
-// [[Rcpp::depends(RcppArmadillo)]]
-
-// required for `R::qchisq()` used in `estimateBandwidth()`
-#include <Rmath.h>
-
-//' Estimated bandwidth for CVE.
-//'
-//' Estimates a propper bandwidth \code{h} according
-//' \deqn{h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
-//'       h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
-//'
-//' @param X data matrix of dimension (n x p) with n data points X_i of dimension
-//'  q. Therefor each row represents a datapoint of dimension p.
-//' @param k Guess for rank(B).
-//' @param nObs Ether numeric of a function. If specified as numeric value
-//'  its used in the computation of the bandwidth directly. If its a function
-//'  `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not
-//'  supplied at all is to use \code{nObs <- nrow(x)^0.5}.
-//'
-//' @seealso [qchisq()]
-//'
-//' @export
-// [[Rcpp::export]]
-double estimateBandwidth(const arma::mat& X, arma::uword k, double nObs) {
-    using namespace arma;
-
-    uword n = X.n_rows; // nr samples
-    uword p = X.n_cols; // dimension of rand. var. `X`
-
-    // column mean
-    mat M = mean(X);
-    // center `X`
-    mat C = X.each_row() - M;
-    // trace of covariance matrix, `traceSigma = Tr(C' C)`
-    double traceSigma = accu(C % C);
-    // compute estimated bandwidth
-    double qchi2 = R::qchisq((nObs - 1.0) / (n - 1), static_cast<double>(k), 1, 0);
-
-    return 2.0 * qchi2 * traceSigma / (p * n);
-}
-
-//' Random element from Stiefel Manifold `S(p, q)`.
-//'
-//' Draws an element of \eqn{S(p, q)} which is the Stiefel Manifold.
-//' This is done by taking the Q-component of the QR decomposition
-//' from a `(p, q)` Matrix with independent standart normal entries.
-//' As a semi-orthogonal Matrix the result `V` satisfies \eqn{V'V = I_q}.
-//'
-//' @param p Row dimension
-//' @param q Column dimension
-//'
-//' @return Matrix of dim `(p, q)`.
-//'
-//' @seealso <https://en.wikipedia.org/wiki/Stiefel_manifold>
-//'
-//' @export
-// [[Rcpp::export]]
-arma::mat rStiefel(arma::uword p, arma::uword q) {
-    arma::mat Q, R;
-    arma::qr_econ(Q, R, arma::randn<arma::mat>(p, q));
-    return Q;
-}
-
-double gradient(const arma::mat& X,
-                const arma::mat& X_diff,
-                const arma::mat& Y,
-                const arma::mat& Y_rep,
-                const arma::mat& V,
-                const double h,
-                arma::mat* G = 0
-) {
-    using namespace arma;
-
-    uword n = X.n_rows;
-    uword p = X.n_cols;
-
-    // orthogonal projection matrix `Q = I - VV'` for dist computation
-    mat Q = -(V * V.t());
-    Q.diag() += 1;
-    // calc pairwise distances as `D` with `D(i, j) = d_i(V, X_j)`
-    vec D_vec = sum(square(X_diff * Q), 1);
-    mat D = reshape(D_vec, n, n);
-    // calc weights as `W` with `W(i, j) = w_i(V, X_j)`
-    mat W = exp(D / (-2.0 * h));
-    // column-wise normalization via 1-norm
-    W = normalise(W, 1);
-    vec W_vec = vectorise(W);
-    // weighted `Y` means (first and second order)
-    vec y1 = W.t() * Y;
-    vec y2 = W.t() * square(Y);
-    // loss for each `X_i`, meaning `L(i) = L_n(V, X_i)`
-    vec L = y2 - square(y1);
-    // `loss = L_n(V)`
-    double loss = mean(L);
-    // check if gradient as output variable is set
-    if (G != 0) {
-        // `G = grad(L_n(V))` a.k.a. gradient of `L` with respect to `V`
-        vec scale = (repelem(L, n, 1) - square(Y_rep - repelem(y1, n, 1))) % W_vec % D_vec;
-        mat X_diff_scale = X_diff.each_col() % scale;
-        (*G) = X_diff_scale.t() * X_diff * V;
-        (*G) *= -2.0 / (h * h * n);
-    }
-
-    return loss;
-}
-
-//' Stiefel Optimization with curvilinear linesearch.
-//'
-//' TODO: finish doc. comment
-//' Stiefel Optimization for \code{V} given a dataset \code{X} and responces
-//' \code{Y} for the model \deqn{Y\sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
-//' to compute the matrix `B` such that \eqn{span{B} = span(V)^{\bot}}{%
-//' span(B) = orth(span(B))}.
-//' The curvilinear linesearch uses Armijo-Wolfe conditions:
-//      \deqn{L(V(tau)) > L(V(0)) + rho_1 * tau * L(V(0))'}
-//'     \deqn{L(V(tau))' < rho_2 * L(V(0))'}
-//'
-//' @param X data points
-//' @param Y response
-//' @param k assumed \eqn{rank(B)}
-//' @param nObs parameter for bandwidth estimation, typical value
-//'     \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8].
-//' @param tau Initial step size
-//' @param tol Tolerance for update error used for stopping criterion
-//' @param maxIter Upper bound of optimization iterations
-//'
-//' @return List containing the bandwidth \code{h}, optimization objective \code{V}
-//'     and the matrix \code{B} estimated for the model as a orthogonal basis of the
-//'     orthogonal space spaned by \code{V}.
-//'
-//' @rdname optStiefel
-//' @keywords internal
-double optStiefel(
-        const arma::mat& X,
-        const arma::vec& Y,
-        const int k,
-        const double h,
-        const double tauInitial,
-        const double tol,
-        const int maxIter,
-        const double rho1,
-        const double rho2,
-        const int maxLineSeachIter,
-        arma::mat& V,      // out
-        arma::vec& history // out
-) {
-    using namespace arma;
-
-    // get dimensions
-    const uword n = X.n_rows; // nr samples
-    const uword p = X.n_cols; // dim of random variable `X`
-    const uword q = p - k;    // rank(V) e.g. dim of ortho space of span{B}
-
-    // all `X_i - X_j` differences, `X_diff.row(i * n + j) = X_i - X_j`
-    mat X_diff(n * n, p);
-    for (uword i = 0, k = 0; i < n; ++i) {
-        for (uword j = 0; j < n; ++j) {
-            X_diff.row(k++) = X.row(i) - X.row(j);
-        }
-    }
-    const vec Y_rep = repmat(Y, n, 1);
-    const mat I_p = eye<mat>(p, p);
-    const mat I_2q = eye<mat>(2 * q, 2 * q);
-
-    // initial start value for `V`
-    V = rStiefel(p, q);
-
-    // first gradient initialization
-    mat G;
-    double loss = gradient(X, X_diff, Y, Y_rep, V, h, &G);
-
-    // set first `loss` in history
-    history(0) = loss;
-
-    // main curvilinear optimization loop
-    double error = datum::inf;
-    int iter = 0;
-    while (iter++ < maxIter && error > tol) {
-        // helper matrices `lU` (linesearch U), `lV` (linesearch V)
-        // as describet in [Wen, Yin] Lemma 4.
-        mat lU = join_rows(G, V); // linesearch "U"
-        mat lV = join_rows(V, -1.0 * G); // linesearch "V"
-        // `A = G V' - V G'`
-        mat A = lU * lV.t();
-
-        // set initial step size for curvilinear line search
-        double tau = tauInitial, lower = 0., upper = datum::inf;
-
-        // check if `tau` is valid for inverting 
-
-        // set line search internal gradient and loss to cycle for next iteration
-        mat V_tau; // next position after a step of size `tau`, a.k.a. `V(tau)`
-        mat G_tau; // gradient of `V` at `V(tau) = V_tau`
-        double loss_tau; // loss (objective) at position `V(tau)`
-        int lsIter = 0; // linesearch iter
-        // start line search
-        do {
-            mat HV = inv(I_2q + (tau/2.) * lV.t() * lU) * lV.t();
-            // next step `V`
-            V_tau = V - tau * (lU * (HV * V));
-
-            double LprimeV = -trace(G.t() * A * V);
-
-            mat lB = I_p - (tau / 2.) * lU * HV;
-
-            loss_tau = gradient(X, X_diff, Y, Y_rep, V_tau, h, &G_tau);
-
-            double LprimeV_tau = -2. * trace(G_tau.t() * lB * A * (V + V_tau));
-
-            // Armijo condition
-            if (loss_tau > loss + (rho1 * tau * LprimeV)) {
-                upper = tau;
-                tau = (lower + upper) / 2.;
-            // Wolfe condition
-            } else if (LprimeV_tau < rho2 * LprimeV) {
-                lower = tau;
-                if (upper == datum::inf) {
-                    tau = 2. * lower;
-                } else {
-                    tau = (lower + upper) / 2.;
-                }
-            } else {
-                break;
-            }
-        } while (++lsIter < maxLineSeachIter);
-
-        // compute error (break condition)
-        // Note: `error` is the decrease of the objective `L_n(V)` and not the
-        //       norm of the gradient as proposed in [Wen, Yin] Algorithm 1.
-        error = loss - loss_tau;
-
-        // cycle `V`, `G` and `loss` for next iteration
-        V = V_tau;
-        loss = loss_tau;
-        G = G_tau;
-
-        // store final `loss`
-        history(iter) = loss;
-
-    }
-
-    return loss;
-}
-
-
-//' Conditional Variance Estimation (CVE) method.
-//'
-//' This version uses a curvilinear linesearch for the stiefel optimization.
-//'
-//' @param X data points
-//' @param Y response
-//' @param k assumed \eqn{rank(B)}
-//' @param nObs parameter for bandwidth estimation, typical value
-//'     \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8].
-//' @param tau Initial step size (default 1)
-//' @param tol Tolerance for update error used for stopping criterion (default 1e-5)
-//' @param slack Ratio of small negative error allowed in loss optimization (default -1e-10)
-//' @param maxIter Upper bound of optimization iterations (default 50)
-//' @param attempts Number of tryes with new random optimization starting points (default 10)
-//'
-//' @return List containing the bandwidth \code{h}, optimization objective \code{V}
-//'     and the matrix \code{B} estimated for the model as a orthogonal basis of the
-//'     orthogonal space spaned by \code{V}.
-//'
-//' @rdname cve_cpp_V2
-//' @export
-// [[Rcpp::export]]
-Rcpp::List cve_cpp(
-        const arma::mat& X,
-        const arma::vec& Y,
-        const int k,
-        const double nObs,
-        const double tauInitial = 1.,
-        const double rho1 = 0.05,
-        const double rho2 = 0.95,
-        const double tol = 1e-6,
-        const int maxIter = 50,
-        const int maxLineSeachIter = 10,
-        const int attempts = 10
-) {
-    using namespace arma;
-
-    // tracker of current best results
-    mat V_best;
-    double loss_best = datum::inf;
-    // estimate bandwidth
-    double h = estimateBandwidth(X, k, nObs);
-
-    // loss history for each optimization attempt
-    // each column contaions the iteration history for the loss
-    mat history = mat(maxIter + 1, attempts);
-
-    // multiple stiefel optimization attempts
-    for (int i = 0; i < attempts; ++i) {
-        // declare output variables
-        mat V; // estimated `V` space
-        vec hist = vec(history.n_rows, fill::zeros); // optimization history
-        double loss = optStiefel(X, Y, k, h,
-            tauInitial, tol, maxIter, rho1, rho2, maxLineSeachIter, V, hist
-        );
-        if (loss < loss_best) {
-            loss_best = loss;
-            V_best = V;
-        }
-        // write history to history collection
-        history.col(i) = hist;
-    }
-
-    // get `B` as kernal of `V.t()`
-    mat B = null(V_best.t());
-
-    return Rcpp::List::create(
-        Rcpp::Named("history") = history,
-        Rcpp::Named("loss") = loss_best,
-        Rcpp::Named("h") = h,
-        Rcpp::Named("V") = V_best,
-        Rcpp::Named("B") = B
-    );
-}
-
-/*** R
-
-source("CVE/R/datasets.R")
-ds <- dataset()
-
-print(system.time(
-    cve.res <- cve_cpp(
-        X = ds$X,
-        Y = ds$Y,
-        k = ncol(ds$B),
-        nObs = sqrt(nrow(ds$X))
-    )
-))
-
-pdf('plots/cve_V2_history.pdf')
-H <- cve.res$history
-H_i <- H[H[, 1] > 0, 1]
-plot(1:length(H_i), H_i,
-    main = "History cve_V2",
-    xlab = "Iterations i",
-    ylab = expression(loss == L[n](V^{(i)})),
-    xlim = c(1, nrow(H)),
-    ylim = c(0, max(H)),
-    type = "l"
-)
-for (i in 2:ncol(H)) {
-    H_i <- H[H[, i] > 0, i]
-    lines(1:length(H_i), H_i)
-}
-x.ends <- apply(H, 2, function(h) { length(h[h > 0]) })
-y.ends <- apply(H, 2, function(h) { min(h[h > 0]) })
-points(x.ends, y.ends)
-
-*/
--- a/runtime_test.R
+++ b/runtime_test.R
@ -1,5 +1,7 @@
 # Usage:
 # ~$ Rscript runtime_test.R
+# library(CVEpureR) # load CVE's pure R implementation
+library(CVE) # load CVE

 #' Writes log information to console. (to not get bored^^)
 tell.user <- function(name, start.time, i, length) {
@ -24,7 +26,6 @@ dataset.names <- c("M1", "M2", "M3", "M4", "M5")
 # Set used CVE method
 methods <- c("simple") # c("legacy", "simple", "sgd", "linesearch")

-library(CVE) # load CVE
 if ("legacy" %in% methods) {
    # Source legacy code (but only if needed)
    source("CVE_legacy/function_script.R")