cleanup

2019-09-25 14:49:12 +02:00 · 2019-09-25 14:49:12 +02:00 · 50302ce18a
commit 50302ce18a
parent 8db5266f8e
25 changed files with 1000 additions and 409 deletions
--- a/CVE_C/NAMESPACE
+++ b/CVE_C/NAMESPACE
@ -5,15 +5,15 @@ S3method(summary,cve)
 export(cve)
 export(cve.call)
 export(cve.grid.search)
 export(cve_linesearch)
 export(cve_sgd)
 export(cve_simple)
 export(dataset)
 export(elem.pairs)
 export(estimate.bandwidth)
 export(grad)
 export(null)
 export(projTangentStiefl)
 export(rStiefl)
 export(retractStiefl)
 export(skew)
 export(sym)
 import(stats)
 importFrom(graphics,lines)
 importFrom(graphics,plot)
--- a/CVE_C/R/cve_linesearch.R
+++ b/CVE_C/R/cve_linesearch.R
@ -1,169 +0,0 @@
 #' Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
 #' conditions.
 #'
 #' @keywords internal
 #' @export
 cve_linesearch <- function(X, Y, k,
                           nObs = sqrt(nrow(X)),
                           h = NULL,
                           tau = 1.0,
                           tol = 1e-3,
                           rho1 = 0.1,
                           rho2 = 0.9,
                           slack = 0,
                           epochs = 50L,
                           attempts = 10L,
                           max.linesearch.iter = 10L,
                           logger = NULL
 ) {
    # Set `grad` functions environment to enable if to find this environments
    # local variabels, needed to enable the manipulation of this local variables
    # from within `grad`.
    environment(grad) <- environment()
    # Get dimensions.
    n <- nrow(X)
    p <- ncol(X)
    q <- p - k
    # Save initial learning rate `tau`.
    tau.init <- tau
    # Addapt tolearance for break condition.
    tol <- sqrt(2 * q) * tol
    # Estaimate bandwidth if not given.
    if (missing(h) | !is.numeric(h)) {
        h <- estimate.bandwidth(X, k, nObs)
    }
    # Compute persistent data.
    # Compute lookup indexes for symmetrie, lower/upper
    # triangular parts and vectorization.
    pair.index <- elem.pairs(seq(n))
    i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
    j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
    # Matrix of vectorized indices. (vec(index) -> seq)
    index <- matrix(seq(n * n), n, n)
    lower <- index[lower.tri(index)]
    upper <- t(index)[lower]
    # Create all pairewise differences of rows of `X`.
    X_diff <- X[i, , drop = F] - X[j, , drop = F]
    # Identity matrix.
    I_p <- diag(1, p)
    # Init tracking of current best (according multiple attempts).
    V.best <- NULL
    loss.best <- Inf
    # Start loop for multiple attempts.
    for (attempt in 1:attempts) {
        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
        # optimization start value.
        V <- rStiefl(p, q)
        # Initial loss and gradient.
        loss <- Inf
        G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
        # Set last loss (aka, loss after applying the step).
        loss.last <- loss
        # Call logger with initial values before starting optimization.
        if (is.function(logger)) {
            epoch <- 0 # Set epoch count to 0 (only relevant for logging).
            error <- NA
            logger(environment())
        }
        ## Start optimization loop.
        for (epoch in 1:epochs) {
            # Cayley transform matrix `A`
            A <- (G %*% t(V)) - (V %*% t(G))
            # Directional derivative of the loss at current position, given
            # as `Tr(G^T \cdot A \cdot V)`.
            loss.prime <- -0.5 * norm(A, type = 'F')^2
            # Linesearch
            tau.upper <- Inf
            tau.lower <- 0
            tau <- tau.init
            for (iter in 1:max.linesearch.iter) {
                # Apply learning rate `tau`.
                A.tau <- (tau / 2) * A
                # Parallet transport (on Stiefl manifold) into direction of `G`.
                inv <- solve(I_p + A.tau)
                V.tau <- inv %*% ((I_p - A.tau) %*% V)
                # Loss at position after a step.
                loss <- Inf # aka loss.tau
                G.tau <- grad(X, Y, V.tau, h, loss.out = TRUE, persistent = TRUE)
                # Armijo condition.
                if (loss > loss.last + (rho1 * tau * loss.prime)) {
                    tau.upper <- tau
                    tau <- (tau.lower + tau.upper) / 2
                    next()
                }
                V.prime.tau <- -0.5 * inv %*% A %*% (V + V.tau)
                loss.prime.tau <- sum(G * V.prime.tau) # Tr(grad(tau)^T \cdot Y^'(tau))
                # Wolfe condition.
                if (loss.prime.tau < rho2 * loss.prime) {
                    tau.lower <- tau
                    if (tau.upper == Inf) {
                        tau <- 2 * tau.lower
                    } else {
                        tau <- (tau.lower + tau.upper) / 2
                    }
                } else {
                    break()
                }
            }
            # Compute error.
            error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
            # Check break condition (epoch check to skip ignored gradient calc).
            # Note: the devision by `sqrt(2 * k)` is included in `tol`.
            if (error < tol | epoch >= epochs) {
                # take last step and stop optimization.
                V <- V.tau
                # Final call to the logger before stopping optimization
                if (is.function(logger)) {
                    G <- G.tau
                    logger(environment())
                }
                break()
            }
            # Perform the step and remember previous loss.
            V <- V.tau
            loss.last <- loss
            G <- G.tau
            # Log after taking current step.
            if (is.function(logger)) {
                logger(environment())
            }
        }
        # Check if current attempt improved previous ones
        if (loss < loss.best) {
            loss.best <- loss
            V.best <- V
        }
    }
    return(list(
        loss = loss.best,
        V = V.best,
        B = null(V.best),
        h = h
    ))
 }
--- a/CVE_C/R/gradient.R
+++ b/CVE_C/R/gradient.R
@ -1,48 +0,0 @@
 #' Compute get gradient of `L(V)` given a dataset `X`.
 #'
 #' @param X Data matrix.
 #' @param Y Responce.
 #' @param V Position to compute the gradient at, aka point on Stiefl manifold.
 #' @param h Bandwidth
 #' @param loss.out Iff \code{TRUE} loss will be written to parent environment.
 #' @param loss.only Boolean to only compute the loss, of \code{TRUE} a single
 #'  value loss is returned and \code{envir} is ignored.
 #' @param persistent Determines if data indices and dependent calculations shall
 #'  be reused from the parent environment. ATTENTION: Do NOT set this flag, only
 #'  intended for internal usage by carefully aligned functions!
 #' @keywords internal
 #' @export
 grad <- function(X, Y, V, h,
                 loss.out = FALSE,
                 loss.only = FALSE,
                 persistent = FALSE) {
    # Get number of samples and dimension.
    n <- nrow(X)
    p <- ncol(X)
    if (!persistent) {
        # Compute lookup indexes for symmetrie, lower/upper
        # triangular parts and vectorization.
        pair.index <- elem.pairs(seq(n))
        i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
        j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
        # Index of vectorized matrix, for lower and upper triangular part.
        lower <- ((i - 1) * n) + j
        upper <- ((j - 1) * n) + i
        # Create all pairewise differences of rows of `X`.
        X_diff <- X[i, , drop = F] - X[j, , drop = F]
    }
    out <- .Call("grad_c", PACKAGE = "CVE",
                 X, X_diff, as.double(Y), V, as.double(h));
    if (loss.only) {
        return(out$loss)
    }
    if (loss.out) {
        loss <<- out$loss
    }
    return(out$G)
 }
--- a/CVE_C/R/util.R
+++ b/CVE_C/R/util.R
@ -1,4 +1,4 @@
-#' Samples uniform from the Stiefel Manifold
+#' Samples uniform from the Stiefl Manifold.
 #'
 #' @param p row dim.
 #' @param q col dim.
@ -10,6 +10,48 @@ rStiefl <- function(p, q) {
    return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
 }
 #' Retraction to the manifold.
 #'
 #' @param A matrix.
 #' @return `(p, q)` semi-orthogonal matrix, aka element of the Stiefl manifold.
 #' @keywords internal
 #' @export
 retractStiefl <- function(A) {
    return(qr.Q(qr(A)))
 }
 #' Skew-Symmetric matrix computed from `A` as
 #' \eqn{1/2 (A - A^T)}.
 #' @param A Matrix of dim `(p, q)`
 #' @return Skew-Symmetric matrix of dim `(p, p)`.
 #' @keywords internal
 #' @export
 skew <- function(A) {
    0.5 * (A - t(A))
 }
 #' Symmetric matrix computed from `A` as
 #' \eqn{1/2 (A + A^T)}.
 #' @param A Matrix of dim `(p, q)`
 #' @return Symmetric matrix of dim `(p, p)`.
 #' @keywords internal
 #' @export
 sym <- function(A) {
    0.5 * (A + t(A))
 }
 #' Orthogonal Projection onto the tangent space of the stiefl manifold.
 #'
 #' @param V Point on the stiefl manifold.
 #' @param G matrix to be projected onto the tangent space at `V`.
 #' @return `(p, q)` matrix as element of the tangent space at `V`.
 #' @keywords internal
 #' @export
 projTangentStiefl <- function(V, G) {
    Q <- diag(1, nrow(V)) - V %*% t(V)
    return(Q %*% G + V %*% skew(t(V) %*% G))
 }
 #' Null space basis of given matrix `V`
 #'
 #' @param V `(p, q)` matrix
@ -19,7 +61,7 @@ rStiefl <- function(p, q) {
 null <- function(V) {
    tmp <- qr(V)
    set <- if(tmp$rank == 0L) seq_len(ncol(V)) else -seq_len(tmp$rank)
-    return(qr.Q(tmp, complete=TRUE)[, set, drop=FALSE])
+    return(qr.Q(tmp, complete = TRUE)[, set, drop = FALSE])
 }
 #' Creates a (numeric) matrix where each column contains
--- a/CVE_C/man/cve.Rd
+++ b/CVE_C/man/cve.Rd
@ -5,10 +5,11 @@
 \alias{cve.call}
 \title{Implementation of the CVE method.}
 \usage{
-cve(formula, data, method = "simple", max.dim = 10, ...)
+cve(formula, data, method = "simple", max.dim = 10L, ...)
-cve.call(X, Y, method = "simple", nObs = nrow(X)^0.5, min.dim = 1,
+cve.call(X, Y, method = "simple", nObs = sqrt(nrow(X)), h = NULL,
-  max.dim = 10, k, ...)
+  min.dim = 1L, max.dim = 10L, k = NULL, tau = 1, tol = 0.001,
  epochs = 50L, attempts = 10L, logger = NULL)
 }
 \arguments{
 \item{formula}{Formel for the regression model defining `X`, `Y`.
--- a/CVE_C/man/cve_linesearch.Rd
+++ b/CVE_C/man/cve_linesearch.Rd
@ -1,16 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cve_linesearch.R
 \name{cve_linesearch}
 \alias{cve_linesearch}
 \title{Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
 conditions.}
 \usage{
 cve_linesearch(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
  tol = 0.001, rho1 = 0.1, rho2 = 0.9, slack = 0, epochs = 50L,
  attempts = 10L, max.linesearch.iter = 10L, logger = NULL)
 }
 \description{
 Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
 conditions.
 }
 \keyword{internal}
--- a/CVE_C/man/cve_sgd.Rd
+++ b/CVE_C/man/cve_sgd.Rd
@ -1,16 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cve_sgd.R
 \name{cve_sgd}
 \alias{cve_sgd}
 \title{Simple implementation of the CVE method. 'Simple' means that this method is
 a classic GD method unsing no further tricks.}
 \usage{
 cve_sgd(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 0.01,
  tol = 0.001, epochs = 50L, batch.size = 16L, attempts = 10L,
  logger = NULL)
 }
 \description{
 Simple implementation of the CVE method. 'Simple' means that this method is
 a classic GD method unsing no further tricks.
 }
 \keyword{internal}
--- a/CVE_C/man/cve_simple.Rd
+++ b/CVE_C/man/cve_simple.Rd
@ -1,16 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cve_simple.R
 \name{cve_simple}
 \alias{cve_simple}
 \title{Simple implementation of the CVE method. 'Simple' means that this method is
 a classic GD method unsing no further tricks.}
 \usage{
 cve_simple(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
  tol = 0.001, slack = 0, epochs = 50L, attempts = 10L,
  logger = NULL)
 }
 \description{
 Simple implementation of the CVE method. 'Simple' means that this method is
 a classic GD method unsing no further tricks.
 }
 \keyword{internal}
--- a/CVE_C/man/grad.Rd
+++ b/CVE_C/man/grad.Rd
@ -1,31 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/gradient.R
 \name{grad}
 \alias{grad}
 \title{Compute get gradient of `L(V)` given a dataset `X`.}
 \usage{
 grad(X, Y, V, h, loss.out = FALSE, loss.only = FALSE,
  persistent = FALSE)
 }
 \arguments{
 \item{X}{Data matrix.}
 \item{Y}{Responce.}
 \item{V}{Position to compute the gradient at, aka point on Stiefl manifold.}
 \item{h}{Bandwidth}
 \item{loss.out}{Iff \code{TRUE} loss will be written to parent environment.}
 \item{loss.only}{Boolean to only compute the loss, of \code{TRUE} a single
 value loss is returned and \code{envir} is ignored.}
 \item{persistent}{Determines if data indices and dependent calculations shall
 be reused from the parent environment. ATTENTION: Do NOT set this flag, only
 intended for internal usage by carefully aligned functions!}
 }
 \description{
 Compute get gradient of `L(V)` given a dataset `X`.
 }
 \keyword{internal}
--- a/CVE_C/man/projTangentStiefl.Rd
+++ b/CVE_C/man/projTangentStiefl.Rd
@ -0,0 +1,20 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/util.R
 \name{projTangentStiefl}
 \alias{projTangentStiefl}
 \title{Orthogonal Projection onto the tangent space of the stiefl manifold.}
 \usage{
 projTangentStiefl(V, G)
 }
 \arguments{
 \item{V}{Point on the stiefl manifold.}
 \item{G}{matrix to be projected onto the tangent space at `V`.}
 }
 \value{
 `(p, q)` matrix as element of the tangent space at `V`.
 }
 \description{
 Orthogonal Projection onto the tangent space of the stiefl manifold.
 }
 \keyword{internal}
--- a/CVE_C/man/rStiefl.Rd
+++ b/CVE_C/man/rStiefl.Rd
@ -2,7 +2,7 @@
 % Please edit documentation in R/util.R
 \name{rStiefl}
 \alias{rStiefl}
-\title{Samples uniform from the Stiefel Manifold}
+\title{Samples uniform from the Stiefl Manifold.}
 \usage{
 rStiefl(p, q)
 }
@ -15,7 +15,7 @@ rStiefl(p, q)
 `(p, q)` semi-orthogonal matrix
 }
 \description{
-Samples uniform from the Stiefel Manifold
+Samples uniform from the Stiefl Manifold.
 }
 \examples{
 V <- rStiefel(6, 4)
--- a/CVE_C/man/retractStiefl.Rd
+++ b/CVE_C/man/retractStiefl.Rd
@ -0,0 +1,18 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/util.R
 \name{retractStiefl}
 \alias{retractStiefl}
 \title{Retraction to the manifold.}
 \usage{
 retractStiefl(A)
 }
 \arguments{
 \item{A}{matrix.}
 }
 \value{
 `(p, q)` semi-orthogonal matrix, aka element of the Stiefl manifold.
 }
 \description{
 Retraction to the manifold.
 }
 \keyword{internal}
--- a/CVE_C/man/skew.Rd
+++ b/CVE_C/man/skew.Rd
@ -0,0 +1,20 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/util.R
 \name{skew}
 \alias{skew}
 \title{Skew-Symmetric matrix computed from `A` as
 \eqn{1/2 (A - A^T)}.}
 \usage{
 skew(A)
 }
 \arguments{
 \item{A}{Matrix of dim `(p, q)`}
 }
 \value{
 Skew-Symmetric matrix of dim `(p, p)`.
 }
 \description{
 Skew-Symmetric matrix computed from `A` as
 \eqn{1/2 (A - A^T)}.
 }
 \keyword{internal}
--- a/CVE_C/man/sym.Rd
+++ b/CVE_C/man/sym.Rd
@ -0,0 +1,20 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/util.R
 \name{sym}
 \alias{sym}
 \title{Symmetric matrix computed from `A` as
 \eqn{1/2 (A + A^T)}.}
 \usage{
 sym(A)
 }
 \arguments{
 \item{A}{Matrix of dim `(p, q)`}
 }
 \value{
 Symmetric matrix of dim `(p, p)`.
 }
 \description{
 Symmetric matrix computed from `A` as
 \eqn{1/2 (A + A^T)}.
 }
 \keyword{internal}
--- a/CVE_C/src/cve_simple.c
+++ b/CVE_C/src/cve_simple.c
@ -12,7 +12,7 @@ void cve_simple_sub(const int n, const int p, const int q,
                    double *V, double *L,
                    SEXP logger, SEXP loggerEnv) {
-    int attempt, epoch, i, j, k, nn = (n * (n - 1)) / 2;
+    int attempt, epoch, i, nn = (n * (n - 1)) / 2;
    double loss, loss_last, loss_best, err, tau;
    double tol = tol_init * sqrt((double)(2 * q));
    double gKscale = -0.5 / h;
--- a/CVE_R/R/CVE.R
+++ b/CVE_R/R/CVE.R
@ -10,6 +10,7 @@
 #'
 #' @docType package
 #' @author Loki
 #' @useDynLib CVE, .registration = TRUE
 "_PACKAGE"
 #' Implementation of the CVE method.
@ -54,21 +55,21 @@
 #' @import stats
 #' @importFrom stats model.frame
 #' @export
-cve <- function(formula, data, method = "simple", max.dim = 10, ...) {
+cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
    # check for type of `data` if supplied and set default
    if (missing(data)) {
        data <- environment(formula)
    } else if (!is.data.frame(data)) {
-        stop('Parameter `data` must be a `data.frame` or missing.')
+        stop("Parameter 'data' must be a 'data.frame' or missing.")
    }
    # extract `X`, `Y` from `formula` with `data`
    model <- stats::model.frame(formula, data)
-    X <- as.matrix(model[,-1, drop = FALSE])
+    X <- as.matrix(model[ ,-1L, drop = FALSE])
-    Y <- as.matrix(model[, 1, drop = FALSE])
+    Y <- as.double(model[ , 1L])
    # pass extracted data on to [cve.call()]
-    dr <- cve.call(X, Y, method = method, ...)
+    dr <- cve.call(X, Y, method = method, max.dim = max.dim, ...)
    # overwrite `call` property from [cve.call()]
    dr$call <- match.call()
@ -83,35 +84,81 @@ cve <- function(formula, data, method = "simple", max.dim = 10, ...) {
 #' @param ... Method specific parameters.
 #' @rdname cve
 #' @export
-cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5,
+cve.call <- function(X, Y, method = "simple",
-                     min.dim = 1, max.dim = 10, k, ...) {
+                     nObs = sqrt(nrow(X)), h = NULL,
                     min.dim = 1L, max.dim = 10L, k = NULL,
                     tau = 1.0, tol = 1e-3,
                     epochs = 50L, attempts = 10L,
                     logger = NULL) {
    # parameter checking
-    if (!is.matrix(X)) {
+    if (!(is.matrix(X) && is.numeric(X))) {
-        stop('X should be a matrices.')
+        stop("Parameter 'X' should be a numeric matrices.")
    }
-    if (is.matrix(Y)) {
+    if (!is.numeric(Y)) {
-        Y <- as.vector(Y)
+        stop("Parameter 'Y' must be numeric.")
    }
    if (is.matrix(Y) || !is.double(Y)) {
        Y <- as.double(Y)
    }
    if (nrow(X) != length(Y)) {
-        stop('Rows of X and number of Y elements are not compatible.')
+        stop("Rows of 'X' and 'Y' elements are not compatible.")
    }
    if (ncol(X) < 2) {
-        stop('X is one dimensional, no need for dimension reduction.')
+        stop("'X' is one dimensional, no need for dimension reduction.")
    }
-    if (!missing(k)) {
+    if (missing(k) || is.null(k)) {
        min.dim <- as.integer(k)
        max.dim <- as.integer(k)
    } else {
        min.dim <- as.integer(min.dim)
        max.dim <- as.integer(min(max.dim, ncol(X) - 1L))
    } else {
        min.dim <- as.integer(k)
        max.dim <- as.integer(k)
    }
    if (min.dim > max.dim) {
-        stop('`min.dim` bigger `max.dim`.')
+        stop("'min.dim' bigger 'max.dim'.")
    }
    if (max.dim >= ncol(X)) {
-        stop('`max.dim` must be smaller than `ncol(X)`.')
+        stop("'max.dim' (or 'k') must be smaller than 'ncol(X)'.")
    }
    if (is.function(h)) {
        estimate.bandwidth <- h
        h <- NULL
    }
    if (!is.numeric(tau) || length(tau) > 1L || tau <= 0.0) {
        stop("Initial step-width 'tau' must be positive number.")
    } else {
        tau <- as.double(tau)
    }
    if (!is.numeric(tol) || length(tol) > 1L || tol < 0.0) {
        stop("Break condition tolerance 'tol' must be not negative number.")
    } else {
        tol <- as.double(tol)
    }
    if (!is.numeric(epochs) || length(epochs) > 1L) {
        stop("Parameter 'epochs' must be positive integer.")
    } else if (!is.integer(epochs)) {
        epochs <- as.integer(epochs)
    }
    if (epochs < 1L) {
        stop("Parameter 'epochs' must be at least 1L.")
    }
    if (!is.numeric(attempts) || length(attempts) > 1L) {
        stop("Parameter 'attempts' must be positive integer.")
    } else if (!is.integer(attempts)) {
        attempts <- as.integer(attempts)
    }
    if (attempts < 1L) {
        stop("Parameter 'attempts' must be at least 1L.")
    }
    if (is.function(logger)) {
        loggerEnv <- environment(logger)
    } else {
        loggerEnv <- NULL
    }
    # Call specified method.
@ -119,15 +166,40 @@ cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5,
    call <- match.call()
    dr <- list()
    for (k in min.dim:max.dim) {
        if (missing(h) || is.null(h)) {
            h <- estimate.bandwidth(X, k, nObs)
        } else if (is.numeric(h) && h > 0.0) {
            h <- as.double(h)
        } else {
            stop("Bandwidth 'h' must be positive numeric.")
        }
        if (method == 'simple') {
-            dr.k <- cve_simple(X, Y, k, nObs = nObs, ...)
+            dr.k <- .Call('cve_simple', PACKAGE = 'CVE',
-        } else if (method == 'linesearch') {
+                          X, Y, k, h,
-            dr.k <- cve_linesearch(X, Y, k, nObs = nObs, ...)
+                          tau, tol,
-        } else if (method == 'sgd') {
+                          epochs, attempts,
-            dr.k <- cve_sgd(X, Y, k, nObs = nObs, ...)
+                          logger, loggerEnv)
        #     dr.k <- cve_simple(X, Y, k, nObs = nObs, ...)
        # } else if (method == 'linesearch') {
        #     dr.k <- cve_linesearch(X, Y, k, nObs = nObs, ...)
        # } else if (method == 'rcg') {
        #     dr.k <- cve_rcg(X, Y, k, nObs = nObs, ...)
        # } else if (method == 'momentum') {
        #     dr.k <- cve_momentum(X, Y, k, nObs = nObs, ...)
        # } else if (method == 'rmsprob') {
        #     dr.k <- cve_rmsprob(X, Y, k, nObs = nObs, ...)
        # } else if (method == 'sgdrmsprob') {
        #     dr.k <- cve_sgdrmsprob(X, Y, k, nObs = nObs, ...)
        # } else if (method == 'sgd') {
        #     dr.k <- cve_sgd(X, Y, k, nObs = nObs, ...)
        } else {
            stop('Got unknown method.')
        }
        dr.k$B <- null(dr.k$V)
        dr.k$loss <- mean(dr.k$L)
        dr.k$h <- h
        dr.k$k <- k
        class(dr.k) <- "cve.k"
        dr[[k]] <- dr.k
@ -140,11 +212,6 @@ cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5,
    return(dr)
 }
 # TODO: write summary
 # summary.cve <- function() {
 #     # code #
 # }
 #' Ploting helper for objects of class \code{cve}.
 #'
 #' @param x Object of class \code{cve} (result of [cve()]).
@ -164,43 +231,19 @@ cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5,
 #' @method plot cve
 #' @export
 plot.cve <- function(x, ...) {
-
+    L <- c()
-
+    k <- c()
-    # H <- x$history
+    for (dr.k in x) {
-    # H_1 <- H[!is.na(H[, 1]), 1]
+        if (class(dr.k) == 'cve.k') {
-
+            k <- c(k, paste0(dr.k$k))
-    # defaults <- list(
+            L <- c(L, dr.k$L)
-    #     main = "History",
+        }
-    #     xlab = "Iterations i",
+    }
-    #     ylab = expression(loss == L[n](V^{(i)})),
+    L <- matrix(L, ncol = length(k))
-    #     xlim = c(1, nrow(H)),
+    boxplot(L, main = "Loss ...",
-    #     ylim = c(0, max(H)),
+            xlab = "SDR dimension k",
-    #     type = "l"
+            ylab = expression(L(V, X[i])),
-    # )
+            names = k)
    # call.plot <- match.call()
    # keys <- names(defaults)
    # keys <- keys[match(keys, names(call.plot)[-1], nomatch = 0) == 0]
    # for (key in keys) {
    #     call.plot[[key]] <- defaults[[key]]
    # }
    # call.plot[[1L]] <- quote(plot)
    # call.plot$x <- quote(1:length(H_1))
    # call.plot$y <- quote(H_1)
    # eval(call.plot)
    # if (ncol(H) > 1) {
    #     for (i in 2:ncol(H)) {
    #         H_i <- H[H[, i] > 0, i]
    #         lines(1:length(H_i), H_i)
    #     }
    # }
    # x.ends <- apply(H, 2, function(h) { length(h[!is.na(h)]) })
    # y.ends <- apply(H, 2, function(h) { tail(h[!is.na(h)], n=1) })
    # points(x.ends, y.ends)
 }
 #' Prints a summary of a \code{cve} result.
--- a/CVE_R/R/cve_momentum.R
+++ b/CVE_R/R/cve_momentum.R
@ -1,17 +1,19 @@
-#' Simple implementation of the CVE method. 'Simple' means that this method is
+#' Implementation of the CVE method as a Riemann Conjugated Gradient method.
 #' a classic GD method unsing no further tricks.
 #'
 #' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector
 #' Transport for Optimization on the Stiefel Manifold
 #' @keywords internal
 #' @export
-cve_simple <- function(X, Y, k,
+cve_momentum <- function(X, Y, k,
-                       nObs = sqrt(nrow(X)),
+                         nObs = sqrt(nrow(X)),
-                       h = NULL,
+                         h = NULL,
-                       tau = 1.0,
+                         tau = 1.0,
-                       tol = 1e-3,
+                         tol = 1e-4,
-                       slack = 0,
+                         rho = 0.1, # Momentum update.
-                       epochs = 50L,
+                         slack = 0,
-                       attempts = 10L,
+                         epochs = 50L,
-                       logger = NULL
+                         attempts = 10L,
                         logger = NULL
 ) {
    # Set `grad` functions environment to enable if to find this environments
    # local variabels, needed to enable the manipulation of this local variables
@ -67,9 +69,6 @@ cve_simple <- function(X, Y, k,
        # Set last loss (aka, loss after applying the step).
        loss.last <- loss
        # Cayley transform matrix `A`
        A <- (G %*% t(V)) - (V %*% t(G))
        # Call logger with initial values before starting optimization.
        if (is.function(logger)) {
            epoch <- 0 # Set epoch count to 0 (only relevant for logging).
@ -77,12 +76,15 @@ cve_simple <- function(X, Y, k,
            logger(environment())
        }
        M <- matrix(0, p, q)
        ## Start optimization loop.
        for (epoch in 1:epochs) {
            # Apply learning rate `tau`.
-            A.tau <- tau * A
+            A <- projTangentStiefl(V, G)
            # Momentum update.
            M <- A + rho * projTangentStiefl(V, M)
            # Parallet transport (on Stiefl manifold) into direction of `G`.
-            V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
+            V.tau <- retractStiefl(V - tau * M)
            # Loss at position after a step.
            loss <- grad(X, Y, V.tau, h, loss.only = TRUE, persistent = TRUE)
@ -119,9 +121,6 @@ cve_simple <- function(X, Y, k,
            # Compute gradient at new position.
            G <- grad(X, Y, V, h, persistent = TRUE)
            # Cayley transform matrix `A`
            A <- (G %*% t(V)) - (V %*% t(G))
        }
        # Check if current attempt improved previous ones
@ -129,7 +128,6 @@ cve_simple <- function(X, Y, k,
            loss.best <- loss
            V.best <- V
        }
    }
    return(list(
--- a/CVE_R/R/cve_rcg.R
+++ b/CVE_R/R/cve_rcg.R
@ -0,0 +1,179 @@
 #' Implementation of the CVE method as a Riemann Conjugated Gradient method.
 #'
 #' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector
 #' Transport for Optimization on the Stiefel Manifold
 #' @keywords internal
 #' @export
 cve_rcg <- function(X, Y, k,
                         nObs = sqrt(nrow(X)),
                         h = NULL,
                         tau = 1.0,
                         tol = 1e-4,
                         rho = 1e-4, # For Armijo condition.
                         slack = 0,
                         epochs = 50L,
                         attempts = 10L,
                         max.linesearch.iter = 20L,
                         logger = NULL
 ) {
    # Set `grad` functions environment to enable if to find this environments
    # local variabels, needed to enable the manipulation of this local variables
    # from within `grad`.
    environment(grad) <- environment()
    # Get dimensions.
    n <- nrow(X) # Number of samples.
    p <- ncol(X) # Data dimensions
    q <- p - k   # Complement dimension of the SDR space.
    # Save initial learning rate `tau`.
    tau.init <- tau
    # Addapt tolearance for break condition.
    tol <- sqrt(2 * q) * tol
    # Estaimate bandwidth if not given.
    if (missing(h) || !is.numeric(h)) {
        h <- estimate.bandwidth(X, k, nObs)
    }
    # Compute persistent data.
    # Compute lookup indexes for symmetrie, lower/upper
    # triangular parts and vectorization.
    pair.index <- elem.pairs(seq(n))
    i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
    j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
    # Index of vectorized matrix, for lower and upper triangular part.
    lower <- ((i - 1) * n) + j
    upper <- ((j - 1) * n) + i
    # Create all pairewise differences of rows of `X`.
    X_diff <- X[i, , drop = F] - X[j, , drop = F]
    # Identity matrix.
    I_p <- diag(1, p)
    # Init tracking of current best (according multiple attempts).
    V.best <- NULL
    loss.best <- Inf
    # Start loop for multiple attempts.
    for (attempt in 1:attempts) {
        # Reset learning rate `tau`.
        tau <- tau.init
        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
        # optimization start value.
        V <- rStiefl(p, q)
        # Initial loss and gradient.
        loss <- Inf
        G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
        # Set last loss (aka, loss after applying the step).
        loss.last <- loss
        # Cayley transform matrix `A`
        A <- (G %*% t(V)) - (V %*% t(G))
        A.last <- A
        W <- -A
        Z <- W %*% V
        # Compute directional derivative.
        loss.prime <- sum(G * Z) # Tr(G^T Z)
        # Call logger with initial values before starting optimization.
        if (is.function(logger)) {
            epoch <- 0 # Set epoch count to 0 (only relevant for logging).
            error <- NA
            logger(environment())
        }
        ## Start optimization loop.
        for (epoch in 1:epochs) {
            # New directional derivative.
            loss.prime <- sum(G * Z)
            # Reset `tau` for step-size selection.
            tau <- tau.init
            for (iter in 1:max.linesearch.iter) {
                V.tau <- retractStiefl(V + tau * Z)
                # Loss at position after a step.
                loss <- grad(X, Y, V.tau, h,
                             loss.only = TRUE, persistent = TRUE)
                # Check Armijo condition.
                if (loss <= loss.last + (rho * tau * loss.prime)) {
                    break() # Iff fulfilled stop linesearch.
                }
                # Reduce step-size and continue linesearch.
                tau <- tau / 2
            }
            # Compute error.
            error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
            # Perform step with found step-size
            V <- V.tau
            loss.last <- loss
            # Call logger.
            if (is.function(logger)) {
                logger(environment())
            }
            # Check break condition.
            # Note: the devision by `sqrt(2 * k)` is included in `tol`.
            if (error < tol) {
                break()
            }
            # Compute Gradient at new position.
            G <- grad(X, Y, V, h, persistent = TRUE)
            # Store last `A` for `beta` computation.
            A.last <- A
            # Cayley transform matrix `A`
            A <- (G %*% t(V)) - (V %*% t(G))
            # Check 2. break condition.
            if (norm(A, type = 'F') < tol) {
                break()
            }
            # New directional derivative.
            loss.prime <- sum(G * Z)
            # Reset beta if needed.
            if (loss.prime < 0) {
                # Compute `beta` as describet in paper.
                beta.FR <- (norm(A, type = 'F') / norm(A.last, type = 'F'))^2
                beta.PR <- sum(A * (A - A.last)) / norm(A.last, type = 'F')^2
                if (beta.PR < -beta.FR) {
                    beta <- -beta.FR
                } else if (abs(beta.PR) < beta.FR) {
                    beta <- beta.PR
                } else if (beta.PR > beta.FR) {
                    beta <- beta.FR
                } else {
                    beta <- 0
                }
            } else {
                beta <- 0
            }
            # Update direction.
            W <- -A + beta * W
            Z <- W %*% V
        }
        # Check if current attempt improved previous ones
        if (loss < loss.best) {
            loss.best <- loss
            V.best <- V
        }
    }
    return(list(
        loss = loss.best,
        V = V.best,
        B = null(V.best),
        h = h
    ))
 }
--- a/CVE_R/R/cve_rmsprob.R
+++ b/CVE_R/R/cve_rmsprob.R
@ -0,0 +1,121 @@
 #' Implementation of the CVE method as a Riemann Conjugated Gradient method.
 #'
 #' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector
 #' Transport for Optimization on the Stiefel Manifold
 #' @keywords internal
 #' @export
 cve_rmsprob <- function(X, Y, k,
                        nObs = sqrt(nrow(X)),
                        h = NULL,
                        tau = 0.1,
                        tol = 1e-4,
                        rho = 0.1, # Momentum update.
                        slack = 0,
                        epochs = 50L,
                        attempts = 10L,
                        epsilon = 1e-7,
                        max.linesearch.iter = 20L,
                        logger = NULL
 ) {
    # Set `grad` functions environment to enable if to find this environments
    # local variabels, needed to enable the manipulation of this local variables
    # from within `grad`.
    environment(grad) <- environment()
    # Get dimensions.
    n <- nrow(X) # Number of samples.
    p <- ncol(X) # Data dimensions
    q <- p - k   # Complement dimension of the SDR space.
    # Save initial learning rate `tau`.
    tau.init <- tau
    # Addapt tolearance for break condition.
    tol <- sqrt(2 * q) * tol
    # Estaimate bandwidth if not given.
    if (missing(h) || !is.numeric(h)) {
        h <- estimate.bandwidth(X, k, nObs)
    }
    # Compute persistent data.
    # Compute lookup indexes for symmetrie, lower/upper
    # triangular parts and vectorization.
    pair.index <- elem.pairs(seq(n))
    i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
    j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
    # Index of vectorized matrix, for lower and upper triangular part.
    lower <- ((i - 1) * n) + j
    upper <- ((j - 1) * n) + i
    # Create all pairewise differences of rows of `X`.
    X_diff <- X[i, , drop = F] - X[j, , drop = F]
    # Identity matrix.
    I_p <- diag(1, p)
    # Init tracking of current best (according multiple attempts).
    V.best <- NULL
    loss.best <- Inf
    # Start loop for multiple attempts.
    for (attempt in 1:attempts) {
        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
        # optimization start value.
        V <- rStiefl(p, q)
        # Call logger with initial values before starting optimization.
        if (is.function(logger)) {
            loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
            epoch <- 0 # Set epoch count to 0 (only relevant for logging).
            error <- NA
            logger(environment())
        }
        M <- matrix(0, p, q)
        ## Start optimization loop.
        for (epoch in 1:epochs) {
            # Compute gradient and loss at current position.
            loss <- Inf
            G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
            # Projectd Gradient.
            A <- projTangentStiefl(V, G)
            # Projected element squared gradient.
            Asq <- projTangentStiefl(V, G * G)
            # Momentum update.
            M <- (1 - rho) * Asq + rho * projTangentStiefl(V, M)
            # Parallet transport (on Stiefl manifold) into direction of `G`.
            V.tau <- retractStiefl(V - tau.init * A / (sqrt(abs(M)) + epsilon))
            # Compute error.
            error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
            # Perform step.
            V <- V.tau
            # Call logger after taking a step.
            if (is.function(logger)) {
                # Set tau to an step size estimate (only for logging)
                tau <- tau.init / mean(sqrt(abs(M)) + epsilon)
                logger(environment())
            }
            # Check break condition.
            # Note: the devision by `sqrt(2 * k)` is included in `tol`.
            if (error < tol) {
                break()
            }
        }
        # Check if current attempt improved previous ones
        if (loss < loss.best) {
            loss.best <- loss
            V.best <- V
        }
    }
    return(list(
        loss = loss.best,
        V = V.best,
        B = null(V.best),
        h = h
    ))
 }
--- a/CVE_R/R/cve_sgdrmsprob.R
+++ b/CVE_R/R/cve_sgdrmsprob.R
@ -3,14 +3,16 @@
 #'
 #' @keywords internal
 #' @export
-cve_sgd <- function(X, Y, k,
+cve_sgdrmsprob <- function(X, Y, k,
                    nObs = sqrt(nrow(X)),
                    h = NULL,
-                    tau = 0.01,
+                    tau = 0.1,
-                    tol = 1e-3,
+                    tol = 1e-4,
                    rho = 0.1,
                    epochs = 50L,
                    batch.size = 16L,
                    attempts = 10L,
                    epsilon = 1e-7,
                    logger = NULL
 ) {
    # Set `grad` functions environment to enable if to find this environments
@ -72,6 +74,7 @@ cve_sgd <- function(X, Y, k,
            logger(environment())
        }
        M <- matrix(0, p, q)
        # Repeat `epochs` times
        for (epoch in 1:epochs) {
            # Shuffle batches
@ -87,13 +90,14 @@ cve_sgd <- function(X, Y, k,
                loss <- NULL
                G <- grad(X[batch, ], Y[batch], V, h, loss.out = TRUE)
-                # Cayley transform matrix.
+                # Projectd Gradient.
-                A <- (G %*% t(V)) - (V %*% t(G))
+                A <- projTangentStiefl(V, G)
-
+                # Projected element squared gradient.
-                # Apply learning rate `tau`.
+                Asq <- projTangentStiefl(V, G * G)
-                A.tau <- tau * A
+                # Momentum update.
                M <- (1 - rho) * Asq + rho * projTangentStiefl(V, M)
                # Parallet transport (on Stiefl manifold) into direction of `G`.
-                V <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
+                V <- retractStiefl(V - tau.init * A / (sqrt(abs(M)) + epsilon))
            }
            # And the error for the history.
            error <- norm(V.last %*% t(V.last) - V %*% t(V), type = "F")
--- a/CVE_R/R/cve_simple.R
+++ b/CVE_R/R/cve_simple.R
@ -72,9 +72,7 @@ cve_simple <- function(X, Y, k,
        # Call logger with initial values before starting optimization.
        if (is.function(logger)) {
-            epoch <- 0 # Set epoch count to 0 (only relevant for logging).
+            logger(0L, attempt, loss, V, tau)
            error <- NA
            logger(environment())
        }
        ## Start optimization loop.
@ -103,7 +101,7 @@ cve_simple <- function(X, Y, k,
                V <- V.tau
                # Call logger last time befor stoping.
                if (is.function(logger)) {
-                    logger(environment())
+                    logger(epoch, attempt, loss, V, tau)
                }
                break()
            }
@ -114,7 +112,7 @@ cve_simple <- function(X, Y, k,
            # Call logger after taking a step.
            if (is.function(logger)) {
-                logger(environment())
+                logger(epoch, attempt, loss, V, tau)
            }
            # Compute gradient at new position.
--- a/CVE_R/R/util.R
+++ b/CVE_R/R/util.R
@ -1,4 +1,4 @@
-#' Samples uniform from the Stiefel Manifold
+#' Samples uniform from the Stiefl Manifold.
 #'
 #' @param p row dim.
 #' @param q col dim.
@ -10,6 +10,48 @@ rStiefl <- function(p, q) {
    return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
 }
 #' Retraction to the manifold.
 #'
 #' @param A matrix.
 #' @return `(p, q)` semi-orthogonal matrix, aka element of the Stiefl manifold.
 #' @keywords internal
 #' @export
 retractStiefl <- function(A) {
    return(qr.Q(qr(A)))
 }
 #' Skew-Symmetric matrix computed from `A` as
 #' \eqn{1/2 (A - A^T)}.
 #' @param A Matrix of dim `(p, q)`
 #' @return Skew-Symmetric matrix of dim `(p, p)`.
 #' @keywords internal
 #' @export
 skew <- function(A) {
    0.5 * (A - t(A))
 }
 #' Symmetric matrix computed from `A` as
 #' \eqn{1/2 (A + A^T)}.
 #' @param A Matrix of dim `(p, q)`
 #' @return Symmetric matrix of dim `(p, p)`.
 #' @keywords internal
 #' @export
 sym <- function(A) {
    0.5 * (A + t(A))
 }
 #' Orthogonal Projection onto the tangent space of the stiefl manifold.
 #'
 #' @param V Point on the stiefl manifold.
 #' @param G matrix to be projected onto the tangent space at `V`.
 #' @return `(p, q)` matrix as element of the tangent space at `V`.
 #' @keywords internal
 #' @export
 projTangentStiefl <- function(V, G) {
    Q <- diag(1, nrow(V)) - V %*% t(V)
    return(Q %*% G + V %*% skew(t(V) %*% G))
 }
 #' Null space basis of given matrix `V`
 #'
 #' @param V `(p, q)` matrix
--- a/CVE_legacy/Test_weigthed_cve.R
+++ b/CVE_legacy/Test_weigthed_cve.R
@ -0,0 +1,113 @@
 #C:\Users\Lukas\Desktop\owncloud\Shared\Lukas\CVE
 install.packages("C:/Users/Lukas/Desktop/owncloud/Shared/Lukas/CVE_1.0.tar.gz", repos=NULL, type="source") 
 install.packages(file.choose(), repos=NULL, type="source") 
 dim<-12
 N<-100
 s<-0.5
 dat<-creat_sample(rep(1,dim)/sqrt(dim),N,fsquare,0.5)
 test<-cve(Y~.,data=as.data.frame(dat),k=1)
 ##############
 #initialize model parameterss
 m<-100 #number of replications in simulation
 dim<-12 #dimension of random variable X
 truedim<-2 #dimension of B=b
 qs<-dim-truedim # dimension of orthogonal complement of B
 b1=c(1,1,1,1,1,1,0,0,0,0,0,0)/sqrt(6)
 b2=c(1,-1,1,-1,1,-1,0,0,0,0,0,0)/sqrt(6)
 b<-cbind(b1,b2)
 #b<-b1
 P<-b%*%t(b)
 sigma=0.5 #error standard deviation
 N<-70 #sample size
 K<-30 #number of arbitrary starting values for curvilinear optimization
 MAXIT<-30 #maximal number of iterations in curvilinear search algorithm
 var_vec<-mat.or.vec(m,12)
 M1_weight<-mat.or.vec(m,13)
 #colnames(M1_weight)<-c('CVE1','CVE2','CVE3','CVE1_Rcpp','CVE2_Rcpp','CVE3_Rcpp','meanMAVE','csMAVE','phd','sir','save','CVE4')
 #link function for M1
 fM1<-function(x){return(x[1]/(0.5+(x[2]+1.5)^2))}
 for (i in 1:m){
  #generate dat according to M1
  dat<-creat_sample_nor_nonstand(b,N,fsquare,diag(rep(1,dim)),sigma) 
  #est sample covariance matrix
  Sig_est<-est_varmat(dat[,-1])
  #est trace of sample covariance matrix
  tr<-var_tr(Sig_est)
  #calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs
  CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
  CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
  CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
  CVE4<-stiefl_weight_partial_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
  CVE5<-stiefl_weight_partial_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
  CVE6<-stiefl_weight_partial_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
  # 
  CVE7<-stiefl_weight_full_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
  CVE8<-stiefl_weight_full_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
  CVE9<-stiefl_weight_full_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
  #
  #
  var_vec[i,1]<-CVE1$var
  var_vec[i,2]<-CVE2$var
  var_vec[i,3]<-CVE3$var
  var_vec[i,4]<-CVE4$var
  var_vec[i,5]<-CVE5$var
  var_vec[i,6]<-CVE6$var
  # 
  var_vec[i,7]<-CVE7$var
  var_vec[i,8]<-CVE8$var
  var_vec[i,9]<-CVE9$var
  CVE1$est_base<-fill_base(CVE1$est_base)
  CVE2$est_base<-fill_base(CVE2$est_base)
  CVE3$est_base<-fill_base(CVE3$est_base)
  CVE4$est_base<-fill_base(CVE4$est_base)
  CVE5$est_base<-fill_base(CVE5$est_base)
  CVE6$est_base<-fill_base(CVE6$est_base)
  CVE7$est_base<-fill_base(CVE7$est_base)
  CVE8$est_base<-fill_base(CVE8$est_base)
  CVE9$est_base<-fill_base(CVE9$est_base)
  # calculate distance between true B and estimated B
  M1_weight[i,1]<-subspace_dist(CVE1$est_base[,1:truedim],b)
  M1_weight[i,2]<-subspace_dist(CVE2$est_base[,1:truedim],b)
  M1_weight[i,3]<-subspace_dist(CVE3$est_base[,1:truedim],b)
  M1_weight[i,4]<-subspace_dist(CVE4$est_base[,1:truedim],b)
  M1_weight[i,5]<-subspace_dist(CVE5$est_base[,1:truedim],b)
  M1_weight[i,6]<-subspace_dist(CVE6$est_base[,1:truedim],b)
  M1_weight[i,7]<-subspace_dist(CVE7$est_base[,1:truedim],b)
  M1_weight[i,8]<-subspace_dist(CVE8$est_base[,1:truedim],b)
  M1_weight[i,9]<-subspace_dist(CVE9$est_base[,1:truedim],b)
  CVE1_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,nObs=N^0.8,attempts=K,tol=10^(-3),slack=0)[[2]]
  CVE2_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,nObs=N^(2/3),attempts=K,tol=10^(-3),slack=0)[[2]]
  CVE3_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,nObs=N^0.5,attempts=K,tol=10^(-3),slack=0)[[2]]
  # CVE4_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,h=h_opt,attempts=K,tol=10^(-3))[[2]]
  #M1_Rcpp[i,12]<-subspace_dist(CVE4_Rcpp$B,b)
  var_vec[i,10]<-CVE1_Rcpp$loss
  var_vec[i,11]<-CVE2_Rcpp$loss
  var_vec[i,12]<-CVE3_Rcpp$loss
  #calculate orthogonal complement of Vhat_k
  #i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs))
  M1_weight[i,10]<-subspace_dist(CVE1_Rcpp$B,b)
  M1_weight[i,11]<-subspace_dist(CVE2_Rcpp$B,b)
  M1_weight[i,12]<-subspace_dist(CVE3_Rcpp$B,b)
  #meanMAVE
  mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE')
  M1_weight[i,13]<-subspace_dist(mod_t2$dir[[truedim]],b)
  print(paste(i,paste('/',m)))
 }
 boxplot(M1_weight[1:(i-1),]/sqrt(2*truedim),names=colnames(M1_weight),ylab='err',main='M1')
 summary(M1_weight[1:(i-1),])
--- a/CVE_legacy/stiefel_weight_package.R
+++ b/CVE_legacy/stiefel_weight_package.R
@ -0,0 +1,222 @@
 LV_weight_partial<-function(V,Xl,dtemp,h,q,Y,grad=T){
  N<-length(Y)
  if(is.vector(V)){k<-1}
  else{k<-length(V[1,])}
  Xlv<-Xl%*%V
  d<-dtemp-((Xlv^2)%*%rep(1,k))
  w<-exp(-0.5*(d/h)^2)#dnorm(d/h)/dnorm(0)
  w<-matrix(w,N,q)
  wn<-apply(w,2,sum)#new
  w<-apply(w,2,column_normalize)
  mY<-t(w)%*%Y
  sig<-t(w)%*%(Y^2)-(mY)^2
  W<-diag(kronecker(t(wn),rep(1,N)))##new
  if(grad==T){
    grad<-mat.or.vec(dim,k)
    tmp1<-(kronecker(sig,rep(1,N))-(as.vector(kronecker(rep(1,q),Y))-kronecker(mY,rep(1,N)))^2)
    if(k==1){
      grad_d<- -2*Xl*as.vector(Xlv)
      grad<-(1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1 #new
      # wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))
      # grad<- wn_grad%*%(sig-rep(var1[2],q))/(sum(wn))+grad
    }
    else{
      for (j in 1:(k)){
        grad_d<- -2*Xl*as.vector(Xlv[,j])
        grad[,j]<- (1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new
        # wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))
        # grad[,j]<- wn_grad%*%(sig-rep(var1[2],q))/(sum(wn))+grad
      }
    }
    ret<-list(t(wn)%*%sig/sum(wn),sig,grad)#new
    names(ret)<-c('var','sig','grad')
  }
  else{
    ret<-list(t(wn)%*%sig/sum(wn),sig)#new
    names(ret)<-c('var','sig')
  }
  return(ret)
 }
 ######
 LV_weight_full<-function(V,Xl,dtemp,h,q,Y,grad=T){
  N<-length(Y)
  if(is.vector(V)){k<-1}
  else{k<-length(V[1,])}
  Xlv<-Xl%*%V
  d<-dtemp-((Xlv^2)%*%rep(1,k))
  w<-exp(-0.5*(d/h)^2)#dnorm(d/h)/dnorm(0)
  w<-matrix(w,N,q)
  wn<-apply(w,2,sum)#new
  w<-apply(w,2,column_normalize)
  mY<-t(w)%*%Y
  sig<-t(w)%*%(Y^2)-(mY)^2
  W<-(kronecker(t(wn),rep(1,N)))#new or ###diag(kronecker(t(wn),rep(1,N)))
  var<-t(wn)%*%sig/sum(wn)#new
  if(grad==T){
    grad<-mat.or.vec(dim,k)
    tmp1<-(kronecker(sig,rep(1,N))-(as.vector(kronecker(rep(1,q),Y))-kronecker(mY,rep(1,N)))^2)
    if(k==1){
      grad_d<- -2*Xl*as.vector(Xlv)
      grad<-(1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new
      wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))#new
      grad<- wn_grad%*%(sig-rep(var,q))/(sum(wn))+grad#new
    }
    else{
      for (j in 1:(k)){
        grad_d<- -2*Xl*as.vector(Xlv[,j])
        grad[,j]<- (1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new
        wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))#new
        grad[,j]<- wn_grad%*%(sig-rep(var,q))/(sum(wn))+grad[,j]#new
      }
    }
    ret<-list(var,sig,grad)
    names(ret)<-c('var','sig','grad')
  }
  else{
    ret<-list(var,sig)
    names(ret)<-c('var','sig')
  }
  return(ret)
 }
 ####
 stiefl_weight_partial_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0){
  Y<-dat[,1]
  X<-dat[,-1]
  N<-length(Y)
  dim<-length(X[1,])
  if(p<1){
    S<-est_varmat(X)
    tmp1<-q_ind(X,S,p)
    q<-tmp1$q
    ind<-tmp1$ind
  }
  else{
    q<-N
    ind<-1:N
  }
  Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N)))
  dtemp<-apply(Xl,1,norm2)
  if(is.null(h)){
    S<-est_varmat(X)
    tr<-var_tr(S)
    h<-choose_h_2(dim,k,N,nObs,tr)
  }
  best<-exp(10000)
  Vend<-mat.or.vec(dim,k)
  sig<-mat.or.vec(q,1)
  for(u in 1:k0){
    Vnew<-Vold<-stiefl_startval(dim,k)
    #print(Vold)
    #print(LV(Vold,Xl,dtemp,h,q,Y)$var)
    Lnew<-Lold<-exp(10000)
    lambda<-lambda_0
    err<-10
    count<-0
    count2<-0
    while(err>tol&count<maxit){
      #print(Vold)
      tmp2<-LV_weight_partial(Vold,Xl,dtemp,h,q,Y)
      G<-tmp2$grad
      Lold<-tmp2$var
      W<-G%*%t(Vold)-Vold%*%t(G)
      stepsize<-lambda#/(2*sqrt(count+1))
      Vnew<-solve(diag(1,dim)+stepsize*W)%*%(diag(1,dim)-stepsize*W)%*%Vold
      # print(Vnew)
      tmp3<-LV_weight_partial(Vnew,Xl,dtemp,h,q,Y,grad=F)
      Lnew<-tmp3$var
      err<-sqrt(sum((Vold%*%t(Vold)-Vnew%*%t(Vnew))^2))/sqrt(2*k)#sqrt(sum(tmp3$grad^2))/(dim*k)#
      #print(err)
      if(((Lnew-Lold)/Lold) > sclack_para){#/(count+1)^(0.5)
        lambda=lambda/2
        err<-10
        count2<-count2+1
        count<-count-1
        Vnew<-Vold #!!!!!
      }
      Vold<-Vnew
      count<-count+1
      #print(count)
    }
    if(best>Lnew){
      best<-Lnew
      Vend<-Vnew
      sig<-tmp3$sig
    }
  }
  ret<-list(Vend,best,sig,count,h,count2)
  names(ret)<-c('est_base','var','aov_dat','count','h','count2') 
  return(ret)
 }
 ###########
 stiefl_weight_full_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0){
  Y<-dat[,1]
  X<-dat[,-1]
  N<-length(Y)
  dim<-length(X[1,])
  if(p<1){
    S<-est_varmat(X)
    tmp1<-q_ind(X,S,p)
    q<-tmp1$q
    ind<-tmp1$ind
  }
  else{
    q<-N
    ind<-1:N
  }
  Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N)))
  dtemp<-apply(Xl,1,norm2)
  if(is.null(h)){
    S<-est_varmat(X)
    tr<-var_tr(S)
    h<-choose_h_2(dim,k,N,nObs,tr)
  }
  best<-exp(10000)
  Vend<-mat.or.vec(dim,k)
  sig<-mat.or.vec(q,1)
  for(u in 1:k0){
    Vnew<-Vold<-stiefl_startval(dim,k)
    #print(Vold)
    #print(LV(Vold,Xl,dtemp,h,q,Y)$var)
    Lnew<-Lold<-exp(10000)
    lambda<-lambda_0
    err<-10
    count<-0
    count2<-0
    while(err>tol&count<maxit){
      #print(Vold)
      tmp2<-LV_weight_full(Vold,Xl,dtemp,h,q,Y)
      G<-tmp2$grad
      Lold<-tmp2$var
      W<-G%*%t(Vold)-Vold%*%t(G)
      stepsize<-lambda#/(2*sqrt(count+1))
      Vnew<-solve(diag(1,dim)+stepsize*W)%*%(diag(1,dim)-stepsize*W)%*%Vold
      # print(Vnew)
      tmp3<-LV_weight_full(Vnew,Xl,dtemp,h,q,Y,grad=F)
      Lnew<-tmp3$var
      err<-sqrt(sum((Vold%*%t(Vold)-Vnew%*%t(Vnew))^2))/sqrt(2*k)#sqrt(sum(tmp3$grad^2))/(dim*k)#
      #print(err)
      if(((Lnew-Lold)/Lold) > sclack_para){#/(count+1)^(0.5)
        lambda=lambda/2
        err<-10
        count2<-count2+1
        count<-count-1
        Vnew<-Vold #!!!!!
      }
      Vold<-Vnew
      count<-count+1
      #print(count)
    }
    if(best>Lnew){
      best<-Lnew
      Vend<-Vnew
      sig<-tmp3$sig
    }
  }
  ret<-list(Vend,best,sig,count,h,count2)
  names(ret)<-c('est_base','var','aov_dat','count','h','count2') 
  return(ret)
 }
--- a/LaTeX/notes.tex
+++ b/LaTeX/notes.tex
@ -0,0 +1,46 @@
 \documentclass[12pt,a4paper]{article}
 \usepackage[utf8]{inputenc}
 \usepackage[T1]{fontenc}
 \usepackage{amsmath, amsfonts, amssymb, amsthm}
 \usepackage{fullpage}
 \newcommand{\vecl}{\ensuremath{\operatorname{vec}_l}}
 \newcommand{\Sym}{\ensuremath{\operatorname{Sym}}}
 \begin{document}
 Indexing a given matrix $A = (a_{ij})_{i,j = 1, ..., n} \in \mathbb{R}^{n\times n}$ given as
 \begin{displaymath}
    A = \begin{pmatrix}
        a_{0,0} & a_{0,1} & a_{0,2} & \ldots & a_{0,n-1} \\
        a_{1,0} & a_{1,1} & a_{1,2} & \ldots & a_{1,n-1} \\
        a_{2,0} & a_{2,1} & a_{2,2} & \ldots & a_{2,n-1} \\
        \vdots & \vdots & \vdots & \ddots & \vdots \\
        a_{n-1,0} & a_{n-1,1} & a_{n-1,2} & \ldots & a_{n-1,n-1}
    \end{pmatrix}
 \end{displaymath}
 A symmetric matrix with zero main diagonal, meaning a matrix $S = S^T$ with $S_{i,i} = 0,\ \forall i = 1,..,n$ is givne in the following form
 \begin{displaymath}
    S = \begin{pmatrix}
        0 & s_{1,0} & s_{2,0} & \ldots & s_{n-1,0} \\
        s_{1,0} & 0 & s_{2,1} & \ldots & s_{n-1,1} \\
        s_{2,0} & s_{2,1} & 0 & \ldots & s_{n-1,2} \\
        \vdots & \vdots & \vdots & \ddots & \vdots \\
        s_{n-1,0} & s_{n-1,1} & s_{n-1,2} & \ldots & 0
    \end{pmatrix}
 \end{displaymath}
 Therefore its sufficient to store only the lower triangular part, for memory efficiency and some further alrogithmic shortcuts (sometime they are more expencife) the symmetric matrix $S$ is stored in packed form, meanin in a vector of the length $\frac{n(n-1)}{2}$. We use (like for matrices) a column-major order of elements and define the $\vecl:\Sym(n)\to \mathbb{R}^{n(n-1) / 2}$ opperator defined as
 \begin{displaymath}
    \vecl(S) = (s_{1,0}, s_{2,0},\cdots,s_{n-1,0},s_{2,1}\cdots,s_{n-1,n-2})^T
 \end{displaymath}
 The relation between the matrix indices $i,j$ and the $\vecl$ index $k$ is given by
 \begin{displaymath}
    (\vecl(S)_k = s_{i,j} \quad\Leftrightarrow\quad k = jn+i) : j \in \{0,...,n-2\} \land j < i < n.
 \end{displaymath}
 \end{document}