cleanup

2019-09-25 14:49:12 +02:00 · 2019-09-25 14:49:12 +02:00 · 50302ce18a
commit 50302ce18a
parent 8db5266f8e
25 changed files with 1000 additions and 409 deletions
--- a/CVE_C/NAMESPACE
+++ b/CVE_C/NAMESPACE
@ -5,15 +5,15 @@ S3method(summary,cve)
 export(cve)
 export(cve.call)
 export(cve.grid.search)
-export(cve_linesearch)
-export(cve_sgd)
-export(cve_simple)
 export(dataset)
 export(elem.pairs)
 export(estimate.bandwidth)
-export(grad)
 export(null)
+export(projTangentStiefl)
 export(rStiefl)
+export(retractStiefl)
+export(skew)
+export(sym)
 import(stats)
 importFrom(graphics,lines)
 importFrom(graphics,plot)
--- a/CVE_C/R/cve_linesearch.R
+++ b/CVE_C/R/cve_linesearch.R
@ -1,169 +0,0 @@
-#' Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
-#' conditions.
-#'
-#' @keywords internal
-#' @export
-cve_linesearch <- function(X, Y, k,
-                           nObs = sqrt(nrow(X)),
-                           h = NULL,
-                           tau = 1.0,
-                           tol = 1e-3,
-                           rho1 = 0.1,
-                           rho2 = 0.9,
-                           slack = 0,
-                           epochs = 50L,
-                           attempts = 10L,
-                           max.linesearch.iter = 10L,
-                           logger = NULL
-) {
-    # Set `grad` functions environment to enable if to find this environments
-    # local variabels, needed to enable the manipulation of this local variables
-    # from within `grad`.
-    environment(grad) <- environment()
-
-    # Get dimensions.
-    n <- nrow(X)
-    p <- ncol(X)
-    q <- p - k
-
-    # Save initial learning rate `tau`.
-    tau.init <- tau
-    # Addapt tolearance for break condition.
-    tol <- sqrt(2 * q) * tol
-
-    # Estaimate bandwidth if not given.
-    if (missing(h) | !is.numeric(h)) {
-        h <- estimate.bandwidth(X, k, nObs)
-    }
-
-    # Compute persistent data.
-    # Compute lookup indexes for symmetrie, lower/upper
-    # triangular parts and vectorization.
-    pair.index <- elem.pairs(seq(n))
-    i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
-    j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
-    # Matrix of vectorized indices. (vec(index) -> seq)
-    index <- matrix(seq(n * n), n, n)
-    lower <- index[lower.tri(index)]
-    upper <- t(index)[lower]
-
-    # Create all pairewise differences of rows of `X`.
-    X_diff <- X[i, , drop = F] - X[j, , drop = F]
-    # Identity matrix.
-    I_p <- diag(1, p)
-
-    # Init tracking of current best (according multiple attempts).
-    V.best <- NULL
-    loss.best <- Inf
-
-    # Start loop for multiple attempts.
-    for (attempt in 1:attempts) {
-
-        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
-        # optimization start value.
-        V <- rStiefl(p, q)
-
-        # Initial loss and gradient.
-        loss <- Inf
-        G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
-        # Set last loss (aka, loss after applying the step).
-        loss.last <- loss
-
-        # Call logger with initial values before starting optimization.
-        if (is.function(logger)) {
-            epoch <- 0 # Set epoch count to 0 (only relevant for logging).
-            error <- NA
-            logger(environment())
-        }
-
-        ## Start optimization loop.
-        for (epoch in 1:epochs) {
-
-            # Cayley transform matrix `A`
-            A <- (G %*% t(V)) - (V %*% t(G))
-
-            # Directional derivative of the loss at current position, given
-            # as `Tr(G^T \cdot A \cdot V)`.
-            loss.prime <- -0.5 * norm(A, type = 'F')^2
-
-            # Linesearch
-            tau.upper <- Inf
-            tau.lower <- 0
-            tau <- tau.init
-            for (iter in 1:max.linesearch.iter) {
-                # Apply learning rate `tau`.
-                A.tau <- (tau / 2) * A
-                # Parallet transport (on Stiefl manifold) into direction of `G`.
-                inv <- solve(I_p + A.tau)
-                V.tau <- inv %*% ((I_p - A.tau) %*% V)
-
-                # Loss at position after a step.
-                loss <- Inf # aka loss.tau
-                G.tau <- grad(X, Y, V.tau, h, loss.out = TRUE, persistent = TRUE)
-
-                # Armijo condition.
-                if (loss > loss.last + (rho1 * tau * loss.prime)) {
-                    tau.upper <- tau
-                    tau <- (tau.lower + tau.upper) / 2
-                    next()
-                }
-
-                V.prime.tau <- -0.5 * inv %*% A %*% (V + V.tau)
-                loss.prime.tau <- sum(G * V.prime.tau) # Tr(grad(tau)^T \cdot Y^'(tau))
-
-                # Wolfe condition.
-                if (loss.prime.tau < rho2 * loss.prime) {
-                    tau.lower <- tau
-                    if (tau.upper == Inf) {
-                        tau <- 2 * tau.lower
-                    } else {
-                        tau <- (tau.lower + tau.upper) / 2
-                    }
-                } else {
-                    break()
-                }
-            }
-
-            # Compute error.
-            error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
-
-            # Check break condition (epoch check to skip ignored gradient calc).
-            # Note: the devision by `sqrt(2 * k)` is included in `tol`.
-            if (error < tol | epoch >= epochs) {
-                # take last step and stop optimization.
-                V <- V.tau
-                # Final call to the logger before stopping optimization
-                if (is.function(logger)) {
-                    G <- G.tau
-                    logger(environment())
-                }
-                break()
-            }
-
-            # Perform the step and remember previous loss.
-            V <- V.tau
-            loss.last <- loss
-            G <- G.tau
-
-            # Log after taking current step.
-            if (is.function(logger)) {
-                logger(environment())
-            }
-
-        }
-
-        # Check if current attempt improved previous ones
-        if (loss < loss.best) {
-            loss.best <- loss
-            V.best <- V
-        }
-
-    }
-
-    return(list(
-        loss = loss.best,
-        V = V.best,
-        B = null(V.best),
-        h = h
-    ))
-}
--- a/CVE_C/R/gradient.R
+++ b/CVE_C/R/gradient.R
@ -1,48 +0,0 @@
-#' Compute get gradient of `L(V)` given a dataset `X`.
-#'
-#' @param X Data matrix.
-#' @param Y Responce.
-#' @param V Position to compute the gradient at, aka point on Stiefl manifold.
-#' @param h Bandwidth
-#' @param loss.out Iff \code{TRUE} loss will be written to parent environment.
-#' @param loss.only Boolean to only compute the loss, of \code{TRUE} a single
-#'  value loss is returned and \code{envir} is ignored.
-#' @param persistent Determines if data indices and dependent calculations shall
-#'  be reused from the parent environment. ATTENTION: Do NOT set this flag, only
-#'  intended for internal usage by carefully aligned functions!
-#' @keywords internal
-#' @export
-grad <- function(X, Y, V, h,
-                 loss.out = FALSE,
-                 loss.only = FALSE,
-                 persistent = FALSE) {
-    # Get number of samples and dimension.
-    n <- nrow(X)
-    p <- ncol(X)
-
-    if (!persistent) {
-        # Compute lookup indexes for symmetrie, lower/upper
-        # triangular parts and vectorization.
-        pair.index <- elem.pairs(seq(n))
-        i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
-        j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
-        # Index of vectorized matrix, for lower and upper triangular part.
-        lower <- ((i - 1) * n) + j
-        upper <- ((j - 1) * n) + i
-
-        # Create all pairewise differences of rows of `X`.
-        X_diff <- X[i, , drop = F] - X[j, , drop = F]
-    }
-
-    out <- .Call("grad_c", PACKAGE = "CVE",
-                 X, X_diff, as.double(Y), V, as.double(h));
-
-    if (loss.only) {
-        return(out$loss)
-    }
-    if (loss.out) {
-        loss <<- out$loss
-    }
-
-    return(out$G)
-}
--- a/CVE_C/R/util.R
+++ b/CVE_C/R/util.R
@ -1,4 +1,4 @@
-#' Samples uniform from the Stiefel Manifold
+#' Samples uniform from the Stiefl Manifold.
 #'
 #' @param p row dim.
 #' @param q col dim.
@ -10,6 +10,48 @@ rStiefl <- function(p, q) {
    return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
 }

+#' Retraction to the manifold.
+#'
+#' @param A matrix.
+#' @return `(p, q)` semi-orthogonal matrix, aka element of the Stiefl manifold.
+#' @keywords internal
+#' @export
+retractStiefl <- function(A) {
+    return(qr.Q(qr(A)))
+}
+
+#' Skew-Symmetric matrix computed from `A` as
+#' \eqn{1/2 (A - A^T)}.
+#' @param A Matrix of dim `(p, q)`
+#' @return Skew-Symmetric matrix of dim `(p, p)`.
+#' @keywords internal
+#' @export
+skew <- function(A) {
+    0.5 * (A - t(A))
+}
+
+#' Symmetric matrix computed from `A` as
+#' \eqn{1/2 (A + A^T)}.
+#' @param A Matrix of dim `(p, q)`
+#' @return Symmetric matrix of dim `(p, p)`.
+#' @keywords internal
+#' @export
+sym <- function(A) {
+    0.5 * (A + t(A))
+}
+
+#' Orthogonal Projection onto the tangent space of the stiefl manifold.
+#'
+#' @param V Point on the stiefl manifold.
+#' @param G matrix to be projected onto the tangent space at `V`.
+#' @return `(p, q)` matrix as element of the tangent space at `V`.
+#' @keywords internal
+#' @export
+projTangentStiefl <- function(V, G) {
+    Q <- diag(1, nrow(V)) - V %*% t(V)
+    return(Q %*% G + V %*% skew(t(V) %*% G))
+}
+
 #' Null space basis of given matrix `V`
 #'
 #' @param V `(p, q)` matrix
@ -19,7 +61,7 @@ rStiefl <- function(p, q) {
 null <- function(V) {
    tmp <- qr(V)
    set <- if(tmp$rank == 0L) seq_len(ncol(V)) else -seq_len(tmp$rank)
-    return(qr.Q(tmp, complete=TRUE)[, set, drop=FALSE])
+    return(qr.Q(tmp, complete = TRUE)[, set, drop = FALSE])
 }

 #' Creates a (numeric) matrix where each column contains
--- a/CVE_C/man/cve.Rd
+++ b/CVE_C/man/cve.Rd
@ -5,10 +5,11 @@
 \alias{cve.call}
 \title{Implementation of the CVE method.}
 \usage{
-cve(formula, data, method = "simple", max.dim = 10, ...)
+cve(formula, data, method = "simple", max.dim = 10L, ...)

-cve.call(X, Y, method = "simple", nObs = nrow(X)^0.5, min.dim = 1,
-  max.dim = 10, k, ...)
+cve.call(X, Y, method = "simple", nObs = sqrt(nrow(X)), h = NULL,
+  min.dim = 1L, max.dim = 10L, k = NULL, tau = 1, tol = 0.001,
+  epochs = 50L, attempts = 10L, logger = NULL)
 }
 \arguments{
 \item{formula}{Formel for the regression model defining `X`, `Y`.
--- a/CVE_C/man/cve_linesearch.Rd
+++ b/CVE_C/man/cve_linesearch.Rd
@ -1,16 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/cve_linesearch.R
-\name{cve_linesearch}
-\alias{cve_linesearch}
-\title{Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
-conditions.}
-\usage{
-cve_linesearch(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
-  tol = 0.001, rho1 = 0.1, rho2 = 0.9, slack = 0, epochs = 50L,
-  attempts = 10L, max.linesearch.iter = 10L, logger = NULL)
-}
-\description{
-Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
-conditions.
-}
-\keyword{internal}
--- a/CVE_C/man/cve_sgd.Rd
+++ b/CVE_C/man/cve_sgd.Rd
@ -1,16 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/cve_sgd.R
-\name{cve_sgd}
-\alias{cve_sgd}
-\title{Simple implementation of the CVE method. 'Simple' means that this method is
-a classic GD method unsing no further tricks.}
-\usage{
-cve_sgd(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 0.01,
-  tol = 0.001, epochs = 50L, batch.size = 16L, attempts = 10L,
-  logger = NULL)
-}
-\description{
-Simple implementation of the CVE method. 'Simple' means that this method is
-a classic GD method unsing no further tricks.
-}
-\keyword{internal}
--- a/CVE_C/man/cve_simple.Rd
+++ b/CVE_C/man/cve_simple.Rd
@ -1,16 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/cve_simple.R
-\name{cve_simple}
-\alias{cve_simple}
-\title{Simple implementation of the CVE method. 'Simple' means that this method is
-a classic GD method unsing no further tricks.}
-\usage{
-cve_simple(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
-  tol = 0.001, slack = 0, epochs = 50L, attempts = 10L,
-  logger = NULL)
-}
-\description{
-Simple implementation of the CVE method. 'Simple' means that this method is
-a classic GD method unsing no further tricks.
-}
-\keyword{internal}
--- a/CVE_C/man/grad.Rd
+++ b/CVE_C/man/grad.Rd
@ -1,31 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/gradient.R
-\name{grad}
-\alias{grad}
-\title{Compute get gradient of `L(V)` given a dataset `X`.}
-\usage{
-grad(X, Y, V, h, loss.out = FALSE, loss.only = FALSE,
-  persistent = FALSE)
-}
-\arguments{
-\item{X}{Data matrix.}
-
-\item{Y}{Responce.}
-
-\item{V}{Position to compute the gradient at, aka point on Stiefl manifold.}
-
-\item{h}{Bandwidth}
-
-\item{loss.out}{Iff \code{TRUE} loss will be written to parent environment.}
-
-\item{loss.only}{Boolean to only compute the loss, of \code{TRUE} a single
-value loss is returned and \code{envir} is ignored.}
-
-\item{persistent}{Determines if data indices and dependent calculations shall
-be reused from the parent environment. ATTENTION: Do NOT set this flag, only
-intended for internal usage by carefully aligned functions!}
-}
-\description{
-Compute get gradient of `L(V)` given a dataset `X`.
-}
-\keyword{internal}
--- a/CVE_C/man/projTangentStiefl.Rd
+++ b/CVE_C/man/projTangentStiefl.Rd
@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/util.R
+\name{projTangentStiefl}
+\alias{projTangentStiefl}
+\title{Orthogonal Projection onto the tangent space of the stiefl manifold.}
+\usage{
+projTangentStiefl(V, G)
+}
+\arguments{
+\item{V}{Point on the stiefl manifold.}
+
+\item{G}{matrix to be projected onto the tangent space at `V`.}
+}
+\value{
+`(p, q)` matrix as element of the tangent space at `V`.
+}
+\description{
+Orthogonal Projection onto the tangent space of the stiefl manifold.
+}
+\keyword{internal}
--- a/CVE_C/man/rStiefl.Rd
+++ b/CVE_C/man/rStiefl.Rd
@ -2,7 +2,7 @@
 % Please edit documentation in R/util.R
 \name{rStiefl}
 \alias{rStiefl}
-\title{Samples uniform from the Stiefel Manifold}
+\title{Samples uniform from the Stiefl Manifold.}
 \usage{
 rStiefl(p, q)
 }
@ -15,7 +15,7 @@ rStiefl(p, q)
 `(p, q)` semi-orthogonal matrix
 }
 \description{
-Samples uniform from the Stiefel Manifold
+Samples uniform from the Stiefl Manifold.
 }
 \examples{
 V <- rStiefel(6, 4)
--- a/CVE_C/man/retractStiefl.Rd
+++ b/CVE_C/man/retractStiefl.Rd
@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/util.R
+\name{retractStiefl}
+\alias{retractStiefl}
+\title{Retraction to the manifold.}
+\usage{
+retractStiefl(A)
+}
+\arguments{
+\item{A}{matrix.}
+}
+\value{
+`(p, q)` semi-orthogonal matrix, aka element of the Stiefl manifold.
+}
+\description{
+Retraction to the manifold.
+}
+\keyword{internal}
--- a/CVE_C/man/skew.Rd
+++ b/CVE_C/man/skew.Rd
@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/util.R
+\name{skew}
+\alias{skew}
+\title{Skew-Symmetric matrix computed from `A` as
+\eqn{1/2 (A - A^T)}.}
+\usage{
+skew(A)
+}
+\arguments{
+\item{A}{Matrix of dim `(p, q)`}
+}
+\value{
+Skew-Symmetric matrix of dim `(p, p)`.
+}
+\description{
+Skew-Symmetric matrix computed from `A` as
+\eqn{1/2 (A - A^T)}.
+}
+\keyword{internal}
--- a/CVE_C/man/sym.Rd
+++ b/CVE_C/man/sym.Rd
@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/util.R
+\name{sym}
+\alias{sym}
+\title{Symmetric matrix computed from `A` as
+\eqn{1/2 (A + A^T)}.}
+\usage{
+sym(A)
+}
+\arguments{
+\item{A}{Matrix of dim `(p, q)`}
+}
+\value{
+Symmetric matrix of dim `(p, p)`.
+}
+\description{
+Symmetric matrix computed from `A` as
+\eqn{1/2 (A + A^T)}.
+}
+\keyword{internal}
--- a/CVE_C/src/cve_simple.c
+++ b/CVE_C/src/cve_simple.c
@ -12,7 +12,7 @@ void cve_simple_sub(const int n, const int p, const int q,
                    double *V, double *L,
                    SEXP logger, SEXP loggerEnv) {

-    int attempt, epoch, i, j, k, nn = (n * (n - 1)) / 2;
+    int attempt, epoch, i, nn = (n * (n - 1)) / 2;
    double loss, loss_last, loss_best, err, tau;
    double tol = tol_init * sqrt((double)(2 * q));
    double gKscale = -0.5 / h;
--- a/CVE_R/R/CVE.R
+++ b/CVE_R/R/CVE.R
@ -10,6 +10,7 @@
 #'
 #' @docType package
 #' @author Loki
+#' @useDynLib CVE, .registration = TRUE
 "_PACKAGE"

 #' Implementation of the CVE method.
@ -54,21 +55,21 @@
 #' @import stats
 #' @importFrom stats model.frame
 #' @export
-cve <- function(formula, data, method = "simple", max.dim = 10, ...) {
+cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
    # check for type of `data` if supplied and set default
    if (missing(data)) {
        data <- environment(formula)
    } else if (!is.data.frame(data)) {
-        stop('Parameter `data` must be a `data.frame` or missing.')
+        stop("Parameter 'data' must be a 'data.frame' or missing.")
    }

    # extract `X`, `Y` from `formula` with `data`
    model <- stats::model.frame(formula, data)
-    X <- as.matrix(model[,-1, drop = FALSE])
-    Y <- as.matrix(model[, 1, drop = FALSE])
+    X <- as.matrix(model[ ,-1L, drop = FALSE])
+    Y <- as.double(model[ , 1L])

    # pass extracted data on to [cve.call()]
-    dr <- cve.call(X, Y, method = method, ...)
+    dr <- cve.call(X, Y, method = method, max.dim = max.dim, ...)

    # overwrite `call` property from [cve.call()]
    dr$call <- match.call()
@ -83,35 +84,81 @@ cve <- function(formula, data, method = "simple", max.dim = 10, ...) {
 #' @param ... Method specific parameters.
 #' @rdname cve
 #' @export
-cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5,
-                     min.dim = 1, max.dim = 10, k, ...) {
+cve.call <- function(X, Y, method = "simple",
+                     nObs = sqrt(nrow(X)), h = NULL,
+                     min.dim = 1L, max.dim = 10L, k = NULL,
+                     tau = 1.0, tol = 1e-3,
+                     epochs = 50L, attempts = 10L,
+                     logger = NULL) {

    # parameter checking
-    if (!is.matrix(X)) {
-        stop('X should be a matrices.')
+    if (!(is.matrix(X) && is.numeric(X))) {
+        stop("Parameter 'X' should be a numeric matrices.")
    }
-    if (is.matrix(Y)) {
-        Y <- as.vector(Y)
+    if (!is.numeric(Y)) {
+        stop("Parameter 'Y' must be numeric.")
+    }
+    if (is.matrix(Y) || !is.double(Y)) {
+        Y <- as.double(Y)
    }
    if (nrow(X) != length(Y)) {
-        stop('Rows of X and number of Y elements are not compatible.')
+        stop("Rows of 'X' and 'Y' elements are not compatible.")
    }
    if (ncol(X) < 2) {
-        stop('X is one dimensional, no need for dimension reduction.')
+        stop("'X' is one dimensional, no need for dimension reduction.")
    }

-    if (!missing(k)) {
-        min.dim <- as.integer(k)
-        max.dim <- as.integer(k)
-    } else {
+    if (missing(k) || is.null(k)) {
        min.dim <- as.integer(min.dim)
        max.dim <- as.integer(min(max.dim, ncol(X) - 1L))
+    } else {
+        min.dim <- as.integer(k)
+        max.dim <- as.integer(k)
    }
    if (min.dim > max.dim) {
-        stop('`min.dim` bigger `max.dim`.')
+        stop("'min.dim' bigger 'max.dim'.")
    }
    if (max.dim >= ncol(X)) {
-        stop('`max.dim` must be smaller than `ncol(X)`.')
+        stop("'max.dim' (or 'k') must be smaller than 'ncol(X)'.")
+    }
+
+    if (is.function(h)) {
+        estimate.bandwidth <- h
+        h <- NULL
+    }
+
+    if (!is.numeric(tau) || length(tau) > 1L || tau <= 0.0) {
+        stop("Initial step-width 'tau' must be positive number.")
+    } else {
+        tau <- as.double(tau)
+    }
+    if (!is.numeric(tol) || length(tol) > 1L || tol < 0.0) {
+        stop("Break condition tolerance 'tol' must be not negative number.")
+    } else {
+        tol <- as.double(tol)
+    }
+
+    if (!is.numeric(epochs) || length(epochs) > 1L) {
+        stop("Parameter 'epochs' must be positive integer.")
+    } else if (!is.integer(epochs)) {
+        epochs <- as.integer(epochs)
+    }
+    if (epochs < 1L) {
+        stop("Parameter 'epochs' must be at least 1L.")
+    }
+    if (!is.numeric(attempts) || length(attempts) > 1L) {
+        stop("Parameter 'attempts' must be positive integer.")
+    } else if (!is.integer(attempts)) {
+        attempts <- as.integer(attempts)
+    }
+    if (attempts < 1L) {
+        stop("Parameter 'attempts' must be at least 1L.")
+    }
+
+    if (is.function(logger)) {
+        loggerEnv <- environment(logger)
+    } else {
+        loggerEnv <- NULL
    }

    # Call specified method.
@ -119,15 +166,40 @@ cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5,
    call <- match.call()
    dr <- list()
    for (k in min.dim:max.dim) {
+
+        if (missing(h) || is.null(h)) {
+            h <- estimate.bandwidth(X, k, nObs)
+        } else if (is.numeric(h) && h > 0.0) {
+            h <- as.double(h)
+        } else {
+            stop("Bandwidth 'h' must be positive numeric.")
+        }
+
        if (method == 'simple') {
-            dr.k <- cve_simple(X, Y, k, nObs = nObs, ...)
-        } else if (method == 'linesearch') {
-            dr.k <- cve_linesearch(X, Y, k, nObs = nObs, ...)
-        } else if (method == 'sgd') {
-            dr.k <- cve_sgd(X, Y, k, nObs = nObs, ...)
+            dr.k <- .Call('cve_simple', PACKAGE = 'CVE',
+                          X, Y, k, h,
+                          tau, tol,
+                          epochs, attempts,
+                          logger, loggerEnv)
+        #     dr.k <- cve_simple(X, Y, k, nObs = nObs, ...)
+        # } else if (method == 'linesearch') {
+        #     dr.k <- cve_linesearch(X, Y, k, nObs = nObs, ...)
+        # } else if (method == 'rcg') {
+        #     dr.k <- cve_rcg(X, Y, k, nObs = nObs, ...)
+        # } else if (method == 'momentum') {
+        #     dr.k <- cve_momentum(X, Y, k, nObs = nObs, ...)
+        # } else if (method == 'rmsprob') {
+        #     dr.k <- cve_rmsprob(X, Y, k, nObs = nObs, ...)
+        # } else if (method == 'sgdrmsprob') {
+        #     dr.k <- cve_sgdrmsprob(X, Y, k, nObs = nObs, ...)
+        # } else if (method == 'sgd') {
+        #     dr.k <- cve_sgd(X, Y, k, nObs = nObs, ...)
        } else {
            stop('Got unknown method.')
        }
+        dr.k$B <- null(dr.k$V)
+        dr.k$loss <- mean(dr.k$L)
+        dr.k$h <- h
        dr.k$k <- k
        class(dr.k) <- "cve.k"
        dr[[k]] <- dr.k
@ -140,11 +212,6 @@ cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5,
    return(dr)
 }

-# TODO: write summary
-# summary.cve <- function() {
-#     # code #
-# }
-
 #' Ploting helper for objects of class \code{cve}.
 #'
 #' @param x Object of class \code{cve} (result of [cve()]).
@ -164,43 +231,19 @@ cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5,
 #' @method plot cve
 #' @export
 plot.cve <- function(x, ...) {
-
-
-    # H <- x$history
-    # H_1 <- H[!is.na(H[, 1]), 1]
-
-    # defaults <- list(
-    #     main = "History",
-    #     xlab = "Iterations i",
-    #     ylab = expression(loss == L[n](V^{(i)})),
-    #     xlim = c(1, nrow(H)),
-    #     ylim = c(0, max(H)),
-    #     type = "l"
-    # )
-
-    # call.plot <- match.call()
-    # keys <- names(defaults)
-    # keys <- keys[match(keys, names(call.plot)[-1], nomatch = 0) == 0]
-
-    # for (key in keys) {
-    #     call.plot[[key]] <- defaults[[key]]
-    # }
-
-    # call.plot[[1L]] <- quote(plot)
-    # call.plot$x <- quote(1:length(H_1))
-    # call.plot$y <- quote(H_1)
-
-    # eval(call.plot)
-
-    # if (ncol(H) > 1) {
-    #     for (i in 2:ncol(H)) {
-    #         H_i <- H[H[, i] > 0, i]
-    #         lines(1:length(H_i), H_i)
-    #     }
-    # }
-    # x.ends <- apply(H, 2, function(h) { length(h[!is.na(h)]) })
-    # y.ends <- apply(H, 2, function(h) { tail(h[!is.na(h)], n=1) })
-    # points(x.ends, y.ends)
+    L <- c()
+    k <- c()
+    for (dr.k in x) {
+        if (class(dr.k) == 'cve.k') {
+            k <- c(k, paste0(dr.k$k))
+            L <- c(L, dr.k$L)
+        }
+    }
+    L <- matrix(L, ncol = length(k))
+    boxplot(L, main = "Loss ...",
+            xlab = "SDR dimension k",
+            ylab = expression(L(V, X[i])),
+            names = k)
 }

 #' Prints a summary of a \code{cve} result.
--- a/CVE_R/R/cve_momentum.R
+++ b/CVE_R/R/cve_momentum.R
@ -1,17 +1,19 @@
-#' Simple implementation of the CVE method. 'Simple' means that this method is
-#' a classic GD method unsing no further tricks.
+#' Implementation of the CVE method as a Riemann Conjugated Gradient method.
 #'
+#' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector
+#' Transport for Optimization on the Stiefel Manifold
 #' @keywords internal
 #' @export
-cve_simple <- function(X, Y, k,
-                       nObs = sqrt(nrow(X)),
-                       h = NULL,
-                       tau = 1.0,
-                       tol = 1e-3,
-                       slack = 0,
-                       epochs = 50L,
-                       attempts = 10L,
-                       logger = NULL
+cve_momentum <- function(X, Y, k,
+                         nObs = sqrt(nrow(X)),
+                         h = NULL,
+                         tau = 1.0,
+                         tol = 1e-4,
+                         rho = 0.1, # Momentum update.
+                         slack = 0,
+                         epochs = 50L,
+                         attempts = 10L,
+                         logger = NULL
 ) {
    # Set `grad` functions environment to enable if to find this environments
    # local variabels, needed to enable the manipulation of this local variables
@ -67,9 +69,6 @@ cve_simple <- function(X, Y, k,
        # Set last loss (aka, loss after applying the step).
        loss.last <- loss

-        # Cayley transform matrix `A`
-        A <- (G %*% t(V)) - (V %*% t(G))
-
        # Call logger with initial values before starting optimization.
        if (is.function(logger)) {
            epoch <- 0 # Set epoch count to 0 (only relevant for logging).
@ -77,12 +76,15 @@ cve_simple <- function(X, Y, k,
            logger(environment())
        }

+        M <- matrix(0, p, q)
        ## Start optimization loop.
        for (epoch in 1:epochs) {
            # Apply learning rate `tau`.
-            A.tau <- tau * A
+            A <- projTangentStiefl(V, G)
+            # Momentum update.
+            M <- A + rho * projTangentStiefl(V, M)
            # Parallet transport (on Stiefl manifold) into direction of `G`.
-            V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
+            V.tau <- retractStiefl(V - tau * M)

            # Loss at position after a step.
            loss <- grad(X, Y, V.tau, h, loss.only = TRUE, persistent = TRUE)
@ -119,9 +121,6 @@ cve_simple <- function(X, Y, k,

            # Compute gradient at new position.
            G <- grad(X, Y, V, h, persistent = TRUE)
-
-            # Cayley transform matrix `A`
-            A <- (G %*% t(V)) - (V %*% t(G))
        }

        # Check if current attempt improved previous ones
@ -129,7 +128,6 @@ cve_simple <- function(X, Y, k,
            loss.best <- loss
            V.best <- V
        }
-
    }

    return(list(
--- a/CVE_R/R/cve_rcg.R
+++ b/CVE_R/R/cve_rcg.R
@ -0,0 +1,179 @@
+#' Implementation of the CVE method as a Riemann Conjugated Gradient method.
+#'
+#' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector
+#' Transport for Optimization on the Stiefel Manifold
+#' @keywords internal
+#' @export
+cve_rcg <- function(X, Y, k,
+                         nObs = sqrt(nrow(X)),
+                         h = NULL,
+                         tau = 1.0,
+                         tol = 1e-4,
+                         rho = 1e-4, # For Armijo condition.
+                         slack = 0,
+                         epochs = 50L,
+                         attempts = 10L,
+                         max.linesearch.iter = 20L,
+                         logger = NULL
+) {
+    # Set `grad` functions environment to enable if to find this environments
+    # local variabels, needed to enable the manipulation of this local variables
+    # from within `grad`.
+    environment(grad) <- environment()
+
+    # Get dimensions.
+    n <- nrow(X) # Number of samples.
+    p <- ncol(X) # Data dimensions
+    q <- p - k   # Complement dimension of the SDR space.
+
+    # Save initial learning rate `tau`.
+    tau.init <- tau
+    # Addapt tolearance for break condition.
+    tol <- sqrt(2 * q) * tol
+
+    # Estaimate bandwidth if not given.
+    if (missing(h) || !is.numeric(h)) {
+        h <- estimate.bandwidth(X, k, nObs)
+    }
+
+    # Compute persistent data.
+    # Compute lookup indexes for symmetrie, lower/upper
+    # triangular parts and vectorization.
+    pair.index <- elem.pairs(seq(n))
+    i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
+    j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
+    # Index of vectorized matrix, for lower and upper triangular part.
+    lower <- ((i - 1) * n) + j
+    upper <- ((j - 1) * n) + i
+
+    # Create all pairewise differences of rows of `X`.
+    X_diff <- X[i, , drop = F] - X[j, , drop = F]
+    # Identity matrix.
+    I_p <- diag(1, p)
+
+    # Init tracking of current best (according multiple attempts).
+    V.best <- NULL
+    loss.best <- Inf
+
+    # Start loop for multiple attempts.
+    for (attempt in 1:attempts) {
+        # Reset learning rate `tau`.
+        tau <- tau.init
+
+        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
+        # optimization start value.
+        V <- rStiefl(p, q)
+
+        # Initial loss and gradient.
+        loss <- Inf
+        G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
+        # Set last loss (aka, loss after applying the step).
+        loss.last <- loss
+
+        # Cayley transform matrix `A`
+        A <- (G %*% t(V)) - (V %*% t(G))
+        A.last <- A
+
+        W <- -A
+        Z <- W %*% V
+
+        # Compute directional derivative.
+        loss.prime <- sum(G * Z) # Tr(G^T Z)
+
+        # Call logger with initial values before starting optimization.
+        if (is.function(logger)) {
+            epoch <- 0 # Set epoch count to 0 (only relevant for logging).
+            error <- NA
+            logger(environment())
+        }
+
+        ## Start optimization loop.
+        for (epoch in 1:epochs) {
+            # New directional derivative.
+            loss.prime <- sum(G * Z)
+
+            # Reset `tau` for step-size selection.
+            tau <- tau.init
+            for (iter in 1:max.linesearch.iter) {
+                V.tau <- retractStiefl(V + tau * Z)
+                # Loss at position after a step.
+                loss <- grad(X, Y, V.tau, h,
+                             loss.only = TRUE, persistent = TRUE)
+                # Check Armijo condition.
+                if (loss <= loss.last + (rho * tau * loss.prime)) {
+                    break() # Iff fulfilled stop linesearch.
+                }
+                # Reduce step-size and continue linesearch.
+                tau <- tau / 2
+            }
+
+            # Compute error.
+            error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
+
+            # Perform step with found step-size
+            V <- V.tau
+            loss.last <- loss
+
+            # Call logger.
+            if (is.function(logger)) {
+                logger(environment())
+            }
+
+            # Check break condition.
+            # Note: the devision by `sqrt(2 * k)` is included in `tol`.
+            if (error < tol) {
+                break()
+            }
+
+            # Compute Gradient at new position.
+            G <- grad(X, Y, V, h, persistent = TRUE)
+            # Store last `A` for `beta` computation.
+            A.last <- A
+            # Cayley transform matrix `A`
+            A <- (G %*% t(V)) - (V %*% t(G))
+
+            # Check 2. break condition.
+            if (norm(A, type = 'F') < tol) {
+                break()
+            }
+
+            # New directional derivative.
+            loss.prime <- sum(G * Z)
+
+            # Reset beta if needed.
+            if (loss.prime < 0) {
+                # Compute `beta` as describet in paper.
+                beta.FR <- (norm(A, type = 'F') / norm(A.last, type = 'F'))^2
+                beta.PR <- sum(A * (A - A.last)) / norm(A.last, type = 'F')^2
+                if (beta.PR < -beta.FR) {
+                    beta <- -beta.FR
+                } else if (abs(beta.PR) < beta.FR) {
+                    beta <- beta.PR
+                } else if (beta.PR > beta.FR) {
+                    beta <- beta.FR
+                } else {
+                    beta <- 0
+                }
+            } else {
+                beta <- 0
+            }
+
+            # Update direction.
+            W <- -A + beta * W
+            Z <- W %*% V
+        }
+
+        # Check if current attempt improved previous ones
+        if (loss < loss.best) {
+            loss.best <- loss
+            V.best <- V
+        }
+    }
+
+    return(list(
+        loss = loss.best,
+        V = V.best,
+        B = null(V.best),
+        h = h
+    ))
+}
--- a/CVE_R/R/cve_rmsprob.R
+++ b/CVE_R/R/cve_rmsprob.R
@ -0,0 +1,121 @@
+#' Implementation of the CVE method as a Riemann Conjugated Gradient method.
+#'
+#' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector
+#' Transport for Optimization on the Stiefel Manifold
+#' @keywords internal
+#' @export
+cve_rmsprob <- function(X, Y, k,
+                        nObs = sqrt(nrow(X)),
+                        h = NULL,
+                        tau = 0.1,
+                        tol = 1e-4,
+                        rho = 0.1, # Momentum update.
+                        slack = 0,
+                        epochs = 50L,
+                        attempts = 10L,
+                        epsilon = 1e-7,
+                        max.linesearch.iter = 20L,
+                        logger = NULL
+) {
+    # Set `grad` functions environment to enable if to find this environments
+    # local variabels, needed to enable the manipulation of this local variables
+    # from within `grad`.
+    environment(grad) <- environment()
+
+    # Get dimensions.
+    n <- nrow(X) # Number of samples.
+    p <- ncol(X) # Data dimensions
+    q <- p - k   # Complement dimension of the SDR space.
+
+    # Save initial learning rate `tau`.
+    tau.init <- tau
+    # Addapt tolearance for break condition.
+    tol <- sqrt(2 * q) * tol
+
+    # Estaimate bandwidth if not given.
+    if (missing(h) || !is.numeric(h)) {
+        h <- estimate.bandwidth(X, k, nObs)
+    }
+
+    # Compute persistent data.
+    # Compute lookup indexes for symmetrie, lower/upper
+    # triangular parts and vectorization.
+    pair.index <- elem.pairs(seq(n))
+    i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
+    j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
+    # Index of vectorized matrix, for lower and upper triangular part.
+    lower <- ((i - 1) * n) + j
+    upper <- ((j - 1) * n) + i
+
+    # Create all pairewise differences of rows of `X`.
+    X_diff <- X[i, , drop = F] - X[j, , drop = F]
+    # Identity matrix.
+    I_p <- diag(1, p)
+
+    # Init tracking of current best (according multiple attempts).
+    V.best <- NULL
+    loss.best <- Inf
+
+    # Start loop for multiple attempts.
+    for (attempt in 1:attempts) {
+        # Sample a `(p, q)` dimensional matrix from the stiefel manifold as
+        # optimization start value.
+        V <- rStiefl(p, q)
+
+        # Call logger with initial values before starting optimization.
+        if (is.function(logger)) {
+            loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
+            epoch <- 0 # Set epoch count to 0 (only relevant for logging).
+            error <- NA
+            logger(environment())
+        }
+
+        M <- matrix(0, p, q)
+        ## Start optimization loop.
+        for (epoch in 1:epochs) {
+            # Compute gradient and loss at current position.
+            loss <- Inf
+            G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
+            # Projectd Gradient.
+            A <- projTangentStiefl(V, G)
+            # Projected element squared gradient.
+            Asq <- projTangentStiefl(V, G * G)
+            # Momentum update.
+            M <- (1 - rho) * Asq + rho * projTangentStiefl(V, M)
+            # Parallet transport (on Stiefl manifold) into direction of `G`.
+            V.tau <- retractStiefl(V - tau.init * A / (sqrt(abs(M)) + epsilon))
+
+            # Compute error.
+            error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
+
+            # Perform step.
+            V <- V.tau
+
+            # Call logger after taking a step.
+            if (is.function(logger)) {
+                # Set tau to an step size estimate (only for logging)
+                tau <- tau.init / mean(sqrt(abs(M)) + epsilon)
+                logger(environment())
+            }
+
+            # Check break condition.
+            # Note: the devision by `sqrt(2 * k)` is included in `tol`.
+            if (error < tol) {
+                break()
+            }
+        }
+
+        # Check if current attempt improved previous ones
+        if (loss < loss.best) {
+            loss.best <- loss
+            V.best <- V
+        }
+    }
+
+    return(list(
+        loss = loss.best,
+        V = V.best,
+        B = null(V.best),
+        h = h
+    ))
+}
--- a/CVE_R/R/cve_sgdrmsprob.R
+++ b/CVE_R/R/cve_sgdrmsprob.R
@ -3,14 +3,16 @@
 #'
 #' @keywords internal
 #' @export
-cve_sgd <- function(X, Y, k,
+cve_sgdrmsprob <- function(X, Y, k,
                    nObs = sqrt(nrow(X)),
                    h = NULL,
-                    tau = 0.01,
-                    tol = 1e-3,
+                    tau = 0.1,
+                    tol = 1e-4,
+                    rho = 0.1,
                    epochs = 50L,
                    batch.size = 16L,
                    attempts = 10L,
+                    epsilon = 1e-7,
                    logger = NULL
 ) {
    # Set `grad` functions environment to enable if to find this environments
@ -72,6 +74,7 @@ cve_sgd <- function(X, Y, k,
            logger(environment())
        }

+        M <- matrix(0, p, q)
        # Repeat `epochs` times
        for (epoch in 1:epochs) {
            # Shuffle batches
@ -87,13 +90,14 @@ cve_sgd <- function(X, Y, k,
                loss <- NULL
                G <- grad(X[batch, ], Y[batch], V, h, loss.out = TRUE)

-                # Cayley transform matrix.
-                A <- (G %*% t(V)) - (V %*% t(G))
-
-                # Apply learning rate `tau`.
-                A.tau <- tau * A
+                # Projectd Gradient.
+                A <- projTangentStiefl(V, G)
+                # Projected element squared gradient.
+                Asq <- projTangentStiefl(V, G * G)
+                # Momentum update.
+                M <- (1 - rho) * Asq + rho * projTangentStiefl(V, M)
                # Parallet transport (on Stiefl manifold) into direction of `G`.
-                V <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
+                V <- retractStiefl(V - tau.init * A / (sqrt(abs(M)) + epsilon))
            }
            # And the error for the history.
            error <- norm(V.last %*% t(V.last) - V %*% t(V), type = "F")
--- a/CVE_R/R/cve_simple.R
+++ b/CVE_R/R/cve_simple.R
@ -72,9 +72,7 @@ cve_simple <- function(X, Y, k,

        # Call logger with initial values before starting optimization.
        if (is.function(logger)) {
-            epoch <- 0 # Set epoch count to 0 (only relevant for logging).
-            error <- NA
-            logger(environment())
+            logger(0L, attempt, loss, V, tau)
        }

        ## Start optimization loop.
@ -103,7 +101,7 @@ cve_simple <- function(X, Y, k,
                V <- V.tau
                # Call logger last time befor stoping.
                if (is.function(logger)) {
-                    logger(environment())
+                    logger(epoch, attempt, loss, V, tau)
                }
                break()
            }
@ -114,7 +112,7 @@ cve_simple <- function(X, Y, k,

            # Call logger after taking a step.
            if (is.function(logger)) {
-                logger(environment())
+                logger(epoch, attempt, loss, V, tau)
            }

            # Compute gradient at new position.
--- a/CVE_R/R/util.R
+++ b/CVE_R/R/util.R
@ -1,4 +1,4 @@
-#' Samples uniform from the Stiefel Manifold
+#' Samples uniform from the Stiefl Manifold.
 #'
 #' @param p row dim.
 #' @param q col dim.
@ -10,6 +10,48 @@ rStiefl <- function(p, q) {
    return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
 }

+#' Retraction to the manifold.
+#'
+#' @param A matrix.
+#' @return `(p, q)` semi-orthogonal matrix, aka element of the Stiefl manifold.
+#' @keywords internal
+#' @export
+retractStiefl <- function(A) {
+    return(qr.Q(qr(A)))
+}
+
+#' Skew-Symmetric matrix computed from `A` as
+#' \eqn{1/2 (A - A^T)}.
+#' @param A Matrix of dim `(p, q)`
+#' @return Skew-Symmetric matrix of dim `(p, p)`.
+#' @keywords internal
+#' @export
+skew <- function(A) {
+    0.5 * (A - t(A))
+}
+
+#' Symmetric matrix computed from `A` as
+#' \eqn{1/2 (A + A^T)}.
+#' @param A Matrix of dim `(p, q)`
+#' @return Symmetric matrix of dim `(p, p)`.
+#' @keywords internal
+#' @export
+sym <- function(A) {
+    0.5 * (A + t(A))
+}
+
+#' Orthogonal Projection onto the tangent space of the stiefl manifold.
+#'
+#' @param V Point on the stiefl manifold.
+#' @param G matrix to be projected onto the tangent space at `V`.
+#' @return `(p, q)` matrix as element of the tangent space at `V`.
+#' @keywords internal
+#' @export
+projTangentStiefl <- function(V, G) {
+    Q <- diag(1, nrow(V)) - V %*% t(V)
+    return(Q %*% G + V %*% skew(t(V) %*% G))
+}
+
 #' Null space basis of given matrix `V`
 #'
 #' @param V `(p, q)` matrix
--- a/CVE_legacy/Test_weigthed_cve.R
+++ b/CVE_legacy/Test_weigthed_cve.R
@ -0,0 +1,113 @@
+
+#C:\Users\Lukas\Desktop\owncloud\Shared\Lukas\CVE
+install.packages("C:/Users/Lukas/Desktop/owncloud/Shared/Lukas/CVE_1.0.tar.gz", repos=NULL, type="source") 
+install.packages(file.choose(), repos=NULL, type="source") 
+
+dim<-12
+N<-100
+s<-0.5
+dat<-creat_sample(rep(1,dim)/sqrt(dim),N,fsquare,0.5)
+test<-cve(Y~.,data=as.data.frame(dat),k=1)
+##############
+#initialize model parameterss
+m<-100 #number of replications in simulation
+dim<-12 #dimension of random variable X
+truedim<-2 #dimension of B=b
+qs<-dim-truedim # dimension of orthogonal complement of B
+b1=c(1,1,1,1,1,1,0,0,0,0,0,0)/sqrt(6)
+b2=c(1,-1,1,-1,1,-1,0,0,0,0,0,0)/sqrt(6)
+b<-cbind(b1,b2)
+#b<-b1
+P<-b%*%t(b)
+sigma=0.5 #error standard deviation
+N<-70 #sample size
+K<-30 #number of arbitrary starting values for curvilinear optimization
+MAXIT<-30 #maximal number of iterations in curvilinear search algorithm
+var_vec<-mat.or.vec(m,12)
+M1_weight<-mat.or.vec(m,13)
+#colnames(M1_weight)<-c('CVE1','CVE2','CVE3','CVE1_Rcpp','CVE2_Rcpp','CVE3_Rcpp','meanMAVE','csMAVE','phd','sir','save','CVE4')
+#link function for M1
+fM1<-function(x){return(x[1]/(0.5+(x[2]+1.5)^2))}
+for (i in 1:m){
+  #generate dat according to M1
+  dat<-creat_sample_nor_nonstand(b,N,fsquare,diag(rep(1,dim)),sigma) 
+  #est sample covariance matrix
+  Sig_est<-est_varmat(dat[,-1])
+  #est trace of sample covariance matrix
+  tr<-var_tr(Sig_est)
+  
+  #calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs
+  CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
+  CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
+  CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
+
+  CVE4<-stiefl_weight_partial_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
+  CVE5<-stiefl_weight_partial_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
+  CVE6<-stiefl_weight_partial_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
+  # 
+  CVE7<-stiefl_weight_full_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
+  CVE8<-stiefl_weight_full_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
+  CVE9<-stiefl_weight_full_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
+  #
+  #
+  var_vec[i,1]<-CVE1$var
+  var_vec[i,2]<-CVE2$var
+  var_vec[i,3]<-CVE3$var
+  
+  var_vec[i,4]<-CVE4$var
+  var_vec[i,5]<-CVE5$var
+  var_vec[i,6]<-CVE6$var
+  # 
+  var_vec[i,7]<-CVE7$var
+  var_vec[i,8]<-CVE8$var
+  var_vec[i,9]<-CVE9$var
+  
+  CVE1$est_base<-fill_base(CVE1$est_base)
+  CVE2$est_base<-fill_base(CVE2$est_base)
+  CVE3$est_base<-fill_base(CVE3$est_base)
+  CVE4$est_base<-fill_base(CVE4$est_base)
+  CVE5$est_base<-fill_base(CVE5$est_base)
+  CVE6$est_base<-fill_base(CVE6$est_base)
+  CVE7$est_base<-fill_base(CVE7$est_base)
+  CVE8$est_base<-fill_base(CVE8$est_base)
+  CVE9$est_base<-fill_base(CVE9$est_base)
+  
+  # calculate distance between true B and estimated B
+  M1_weight[i,1]<-subspace_dist(CVE1$est_base[,1:truedim],b)
+  M1_weight[i,2]<-subspace_dist(CVE2$est_base[,1:truedim],b)
+  M1_weight[i,3]<-subspace_dist(CVE3$est_base[,1:truedim],b)
+  M1_weight[i,4]<-subspace_dist(CVE4$est_base[,1:truedim],b)
+  M1_weight[i,5]<-subspace_dist(CVE5$est_base[,1:truedim],b)
+  M1_weight[i,6]<-subspace_dist(CVE6$est_base[,1:truedim],b)
+  M1_weight[i,7]<-subspace_dist(CVE7$est_base[,1:truedim],b)
+  M1_weight[i,8]<-subspace_dist(CVE8$est_base[,1:truedim],b)
+  M1_weight[i,9]<-subspace_dist(CVE9$est_base[,1:truedim],b)
+
+
+
+  CVE1_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,nObs=N^0.8,attempts=K,tol=10^(-3),slack=0)[[2]]
+  CVE2_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,nObs=N^(2/3),attempts=K,tol=10^(-3),slack=0)[[2]]
+  CVE3_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,nObs=N^0.5,attempts=K,tol=10^(-3),slack=0)[[2]]
+
+  # CVE4_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,h=h_opt,attempts=K,tol=10^(-3))[[2]]
+  #M1_Rcpp[i,12]<-subspace_dist(CVE4_Rcpp$B,b)
+  
+  var_vec[i,10]<-CVE1_Rcpp$loss
+  var_vec[i,11]<-CVE2_Rcpp$loss
+  var_vec[i,12]<-CVE3_Rcpp$loss
+  
+  #calculate orthogonal complement of Vhat_k
+  #i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs))
+  
+  
+  M1_weight[i,10]<-subspace_dist(CVE1_Rcpp$B,b)
+  M1_weight[i,11]<-subspace_dist(CVE2_Rcpp$B,b)
+  M1_weight[i,12]<-subspace_dist(CVE3_Rcpp$B,b)
+  #meanMAVE
+  mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE')
+  M1_weight[i,13]<-subspace_dist(mod_t2$dir[[truedim]],b)
+  
+  print(paste(i,paste('/',m)))
+}
+boxplot(M1_weight[1:(i-1),]/sqrt(2*truedim),names=colnames(M1_weight),ylab='err',main='M1')
+summary(M1_weight[1:(i-1),])
--- a/CVE_legacy/stiefel_weight_package.R
+++ b/CVE_legacy/stiefel_weight_package.R
@ -0,0 +1,222 @@
+LV_weight_partial<-function(V,Xl,dtemp,h,q,Y,grad=T){
+  N<-length(Y)
+  if(is.vector(V)){k<-1}
+  else{k<-length(V[1,])}
+  Xlv<-Xl%*%V
+  d<-dtemp-((Xlv^2)%*%rep(1,k))
+  w<-exp(-0.5*(d/h)^2)#dnorm(d/h)/dnorm(0)
+  w<-matrix(w,N,q)
+  wn<-apply(w,2,sum)#new
+  w<-apply(w,2,column_normalize)
+  mY<-t(w)%*%Y
+  sig<-t(w)%*%(Y^2)-(mY)^2
+  W<-diag(kronecker(t(wn),rep(1,N)))##new
+  if(grad==T){
+    grad<-mat.or.vec(dim,k)
+    tmp1<-(kronecker(sig,rep(1,N))-(as.vector(kronecker(rep(1,q),Y))-kronecker(mY,rep(1,N)))^2)
+    if(k==1){
+      grad_d<- -2*Xl*as.vector(Xlv)
+      grad<-(1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1 #new
+      # wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))
+      # grad<- wn_grad%*%(sig-rep(var1[2],q))/(sum(wn))+grad
+    }
+    else{
+      for (j in 1:(k)){
+        grad_d<- -2*Xl*as.vector(Xlv[,j])
+        grad[,j]<- (1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new
+        # wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))
+        # grad[,j]<- wn_grad%*%(sig-rep(var1[2],q))/(sum(wn))+grad
+      }
+    }
+    ret<-list(t(wn)%*%sig/sum(wn),sig,grad)#new
+    names(ret)<-c('var','sig','grad')
+  }
+  else{
+    ret<-list(t(wn)%*%sig/sum(wn),sig)#new
+    names(ret)<-c('var','sig')
+  }
+  
+  return(ret)
+}
+######
+LV_weight_full<-function(V,Xl,dtemp,h,q,Y,grad=T){
+  N<-length(Y)
+  if(is.vector(V)){k<-1}
+  else{k<-length(V[1,])}
+  Xlv<-Xl%*%V
+  d<-dtemp-((Xlv^2)%*%rep(1,k))
+  w<-exp(-0.5*(d/h)^2)#dnorm(d/h)/dnorm(0)
+  w<-matrix(w,N,q)
+  wn<-apply(w,2,sum)#new
+  w<-apply(w,2,column_normalize)
+  mY<-t(w)%*%Y
+  sig<-t(w)%*%(Y^2)-(mY)^2
+  W<-(kronecker(t(wn),rep(1,N)))#new or ###diag(kronecker(t(wn),rep(1,N)))
+  var<-t(wn)%*%sig/sum(wn)#new
+  if(grad==T){
+    grad<-mat.or.vec(dim,k)
+    tmp1<-(kronecker(sig,rep(1,N))-(as.vector(kronecker(rep(1,q),Y))-kronecker(mY,rep(1,N)))^2)
+    if(k==1){
+      grad_d<- -2*Xl*as.vector(Xlv)
+      grad<-(1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new
+      wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))#new
+      grad<- wn_grad%*%(sig-rep(var,q))/(sum(wn))+grad#new
+    }
+    else{
+      for (j in 1:(k)){
+        grad_d<- -2*Xl*as.vector(Xlv[,j])
+        grad[,j]<- (1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new
+        wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))#new
+        grad[,j]<- wn_grad%*%(sig-rep(var,q))/(sum(wn))+grad[,j]#new
+      }
+    }
+    ret<-list(var,sig,grad)
+    names(ret)<-c('var','sig','grad')
+  }
+  else{
+    ret<-list(var,sig)
+    names(ret)<-c('var','sig')
+  }
+  
+  return(ret)
+}
+####
+stiefl_weight_partial_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0){
+  Y<-dat[,1]
+  X<-dat[,-1]
+  N<-length(Y)
+  dim<-length(X[1,])
+  if(p<1){
+    S<-est_varmat(X)
+    tmp1<-q_ind(X,S,p)
+    q<-tmp1$q
+    ind<-tmp1$ind
+  }
+  else{
+    q<-N
+    ind<-1:N
+  }
+  Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N)))
+  dtemp<-apply(Xl,1,norm2)
+  if(is.null(h)){
+    S<-est_varmat(X)
+    tr<-var_tr(S)
+    h<-choose_h_2(dim,k,N,nObs,tr)
+  }
+  best<-exp(10000)
+  Vend<-mat.or.vec(dim,k)
+  sig<-mat.or.vec(q,1)
+  for(u in 1:k0){
+    Vnew<-Vold<-stiefl_startval(dim,k)
+    #print(Vold)
+    #print(LV(Vold,Xl,dtemp,h,q,Y)$var)
+    Lnew<-Lold<-exp(10000)
+    lambda<-lambda_0
+    err<-10
+    count<-0
+    count2<-0
+    while(err>tol&count<maxit){
+      #print(Vold)
+      tmp2<-LV_weight_partial(Vold,Xl,dtemp,h,q,Y)
+      G<-tmp2$grad
+      Lold<-tmp2$var
+      W<-G%*%t(Vold)-Vold%*%t(G)
+      stepsize<-lambda#/(2*sqrt(count+1))
+      Vnew<-solve(diag(1,dim)+stepsize*W)%*%(diag(1,dim)-stepsize*W)%*%Vold
+      # print(Vnew)
+      tmp3<-LV_weight_partial(Vnew,Xl,dtemp,h,q,Y,grad=F)
+      Lnew<-tmp3$var
+      err<-sqrt(sum((Vold%*%t(Vold)-Vnew%*%t(Vnew))^2))/sqrt(2*k)#sqrt(sum(tmp3$grad^2))/(dim*k)#
+      #print(err)
+      if(((Lnew-Lold)/Lold) > sclack_para){#/(count+1)^(0.5)
+        lambda=lambda/2
+        err<-10
+        count2<-count2+1
+        count<-count-1
+        Vnew<-Vold #!!!!!
+        
+      }
+      Vold<-Vnew
+      count<-count+1
+      #print(count)
+    }
+    if(best>Lnew){
+      best<-Lnew
+      Vend<-Vnew
+      sig<-tmp3$sig
+    }
+  }
+  ret<-list(Vend,best,sig,count,h,count2)
+  names(ret)<-c('est_base','var','aov_dat','count','h','count2') 
+  return(ret)
+}
+###########
+stiefl_weight_full_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0){
+  Y<-dat[,1]
+  X<-dat[,-1]
+  N<-length(Y)
+  dim<-length(X[1,])
+  if(p<1){
+    S<-est_varmat(X)
+    tmp1<-q_ind(X,S,p)
+    q<-tmp1$q
+    ind<-tmp1$ind
+  }
+  else{
+    q<-N
+    ind<-1:N
+  }
+  Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N)))
+  dtemp<-apply(Xl,1,norm2)
+  if(is.null(h)){
+    S<-est_varmat(X)
+    tr<-var_tr(S)
+    h<-choose_h_2(dim,k,N,nObs,tr)
+  }
+  best<-exp(10000)
+  Vend<-mat.or.vec(dim,k)
+  sig<-mat.or.vec(q,1)
+  for(u in 1:k0){
+    Vnew<-Vold<-stiefl_startval(dim,k)
+    #print(Vold)
+    #print(LV(Vold,Xl,dtemp,h,q,Y)$var)
+    Lnew<-Lold<-exp(10000)
+    lambda<-lambda_0
+    err<-10
+    count<-0
+    count2<-0
+    while(err>tol&count<maxit){
+      #print(Vold)
+      tmp2<-LV_weight_full(Vold,Xl,dtemp,h,q,Y)
+      G<-tmp2$grad
+      Lold<-tmp2$var
+      W<-G%*%t(Vold)-Vold%*%t(G)
+      stepsize<-lambda#/(2*sqrt(count+1))
+      Vnew<-solve(diag(1,dim)+stepsize*W)%*%(diag(1,dim)-stepsize*W)%*%Vold
+      # print(Vnew)
+      tmp3<-LV_weight_full(Vnew,Xl,dtemp,h,q,Y,grad=F)
+      Lnew<-tmp3$var
+      err<-sqrt(sum((Vold%*%t(Vold)-Vnew%*%t(Vnew))^2))/sqrt(2*k)#sqrt(sum(tmp3$grad^2))/(dim*k)#
+      #print(err)
+      if(((Lnew-Lold)/Lold) > sclack_para){#/(count+1)^(0.5)
+        lambda=lambda/2
+        err<-10
+        count2<-count2+1
+        count<-count-1
+        Vnew<-Vold #!!!!!
+        
+      }
+      Vold<-Vnew
+      count<-count+1
+      #print(count)
+    }
+    if(best>Lnew){
+      best<-Lnew
+      Vend<-Vnew
+      sig<-tmp3$sig
+    }
+  }
+  ret<-list(Vend,best,sig,count,h,count2)
+  names(ret)<-c('est_base','var','aov_dat','count','h','count2') 
+  return(ret)
+}
--- a/LaTeX/notes.tex
+++ b/LaTeX/notes.tex
@ -0,0 +1,46 @@
+\documentclass[12pt,a4paper]{article}
+
+\usepackage[utf8]{inputenc}
+\usepackage[T1]{fontenc}
+\usepackage{amsmath, amsfonts, amssymb, amsthm}
+\usepackage{fullpage}
+
+\newcommand{\vecl}{\ensuremath{\operatorname{vec}_l}}
+\newcommand{\Sym}{\ensuremath{\operatorname{Sym}}}
+
+\begin{document}
+
+Indexing a given matrix $A = (a_{ij})_{i,j = 1, ..., n} \in \mathbb{R}^{n\times n}$ given as
+\begin{displaymath}
+    A = \begin{pmatrix}
+        a_{0,0} & a_{0,1} & a_{0,2} & \ldots & a_{0,n-1} \\
+        a_{1,0} & a_{1,1} & a_{1,2} & \ldots & a_{1,n-1} \\
+        a_{2,0} & a_{2,1} & a_{2,2} & \ldots & a_{2,n-1} \\
+        \vdots & \vdots & \vdots & \ddots & \vdots \\
+        a_{n-1,0} & a_{n-1,1} & a_{n-1,2} & \ldots & a_{n-1,n-1}
+    \end{pmatrix}
+\end{displaymath}
+
+A symmetric matrix with zero main diagonal, meaning a matrix $S = S^T$ with $S_{i,i} = 0,\ \forall i = 1,..,n$ is givne in the following form
+\begin{displaymath}
+    S = \begin{pmatrix}
+        0 & s_{1,0} & s_{2,0} & \ldots & s_{n-1,0} \\
+        s_{1,0} & 0 & s_{2,1} & \ldots & s_{n-1,1} \\
+        s_{2,0} & s_{2,1} & 0 & \ldots & s_{n-1,2} \\
+        \vdots & \vdots & \vdots & \ddots & \vdots \\
+        s_{n-1,0} & s_{n-1,1} & s_{n-1,2} & \ldots & 0
+    \end{pmatrix}
+\end{displaymath}
+Therefore its sufficient to store only the lower triangular part, for memory efficiency and some further alrogithmic shortcuts (sometime they are more expencife) the symmetric matrix $S$ is stored in packed form, meanin in a vector of the length $\frac{n(n-1)}{2}$. We use (like for matrices) a column-major order of elements and define the $\vecl:\Sym(n)\to \mathbb{R}^{n(n-1) / 2}$ opperator defined as
+
+\begin{displaymath}
+    \vecl(S) = (s_{1,0}, s_{2,0},\cdots,s_{n-1,0},s_{2,1}\cdots,s_{n-1,n-2})^T
+\end{displaymath}
+
+The relation between the matrix indices $i,j$ and the $\vecl$ index $k$ is given by
+
+\begin{displaymath}
+    (\vecl(S)_k = s_{i,j} \quad\Leftrightarrow\quad k = jn+i) : j \in \{0,...,n-2\} \land j < i < n.
+\end{displaymath}
+
+\end{document}