From 50302ce18aa607b73b3ab711a8508b677cb2fbe3 Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 25 Sep 2019 14:49:12 +0200 Subject: [PATCH] cleanup --- CVE_C/NAMESPACE | 8 +- CVE_C/R/cve_linesearch.R | 169 ------------- CVE_C/R/gradient.R | 48 ---- CVE_C/R/util.R | 46 +++- CVE_C/man/cve.Rd | 7 +- CVE_C/man/cve_linesearch.Rd | 16 -- CVE_C/man/cve_sgd.Rd | 16 -- CVE_C/man/cve_simple.Rd | 16 -- CVE_C/man/grad.Rd | 31 --- CVE_C/man/projTangentStiefl.Rd | 20 ++ CVE_C/man/rStiefl.Rd | 4 +- CVE_C/man/retractStiefl.Rd | 18 ++ CVE_C/man/skew.Rd | 20 ++ CVE_C/man/sym.Rd | 20 ++ CVE_C/src/cve_simple.c | 2 +- CVE_R/R/CVE.R | 175 ++++++++------ .../R/cve_simple.R => CVE_R/R/cve_momentum.R | 38 ++- CVE_R/R/cve_rcg.R | 179 ++++++++++++++ CVE_R/R/cve_rmsprob.R | 121 ++++++++++ CVE_C/R/cve_sgd.R => CVE_R/R/cve_sgdrmsprob.R | 22 +- CVE_R/R/cve_simple.R | 8 +- CVE_R/R/util.R | 44 +++- CVE_legacy/Test_weigthed_cve.R | 113 +++++++++ CVE_legacy/stiefel_weight_package.R | 222 ++++++++++++++++++ LaTeX/notes.tex | 46 ++++ 25 files changed, 1000 insertions(+), 409 deletions(-) delete mode 100644 CVE_C/R/cve_linesearch.R delete mode 100644 CVE_C/R/gradient.R delete mode 100644 CVE_C/man/cve_linesearch.Rd delete mode 100644 CVE_C/man/cve_sgd.Rd delete mode 100644 CVE_C/man/cve_simple.Rd delete mode 100644 CVE_C/man/grad.Rd create mode 100644 CVE_C/man/projTangentStiefl.Rd create mode 100644 CVE_C/man/retractStiefl.Rd create mode 100644 CVE_C/man/skew.Rd create mode 100644 CVE_C/man/sym.Rd rename CVE_C/R/cve_simple.R => CVE_R/R/cve_momentum.R (82%) create mode 100644 CVE_R/R/cve_rcg.R create mode 100644 CVE_R/R/cve_rmsprob.R rename CVE_C/R/cve_sgd.R => CVE_R/R/cve_sgdrmsprob.R (87%) create mode 100644 CVE_legacy/Test_weigthed_cve.R create mode 100644 CVE_legacy/stiefel_weight_package.R create mode 100644 LaTeX/notes.tex diff --git a/CVE_C/NAMESPACE b/CVE_C/NAMESPACE index 65deac3..365ae9b 100644 --- a/CVE_C/NAMESPACE +++ b/CVE_C/NAMESPACE @@ -5,15 +5,15 @@ S3method(summary,cve) export(cve) export(cve.call) export(cve.grid.search) -export(cve_linesearch) -export(cve_sgd) -export(cve_simple) export(dataset) export(elem.pairs) export(estimate.bandwidth) -export(grad) export(null) +export(projTangentStiefl) export(rStiefl) +export(retractStiefl) +export(skew) +export(sym) import(stats) importFrom(graphics,lines) importFrom(graphics,plot) diff --git a/CVE_C/R/cve_linesearch.R b/CVE_C/R/cve_linesearch.R deleted file mode 100644 index d2eb115..0000000 --- a/CVE_C/R/cve_linesearch.R +++ /dev/null @@ -1,169 +0,0 @@ -#' Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe -#' conditions. -#' -#' @keywords internal -#' @export -cve_linesearch <- function(X, Y, k, - nObs = sqrt(nrow(X)), - h = NULL, - tau = 1.0, - tol = 1e-3, - rho1 = 0.1, - rho2 = 0.9, - slack = 0, - epochs = 50L, - attempts = 10L, - max.linesearch.iter = 10L, - logger = NULL -) { - # Set `grad` functions environment to enable if to find this environments - # local variabels, needed to enable the manipulation of this local variables - # from within `grad`. - environment(grad) <- environment() - - # Get dimensions. - n <- nrow(X) - p <- ncol(X) - q <- p - k - - # Save initial learning rate `tau`. - tau.init <- tau - # Addapt tolearance for break condition. - tol <- sqrt(2 * q) * tol - - # Estaimate bandwidth if not given. - if (missing(h) | !is.numeric(h)) { - h <- estimate.bandwidth(X, k, nObs) - } - - # Compute persistent data. - # Compute lookup indexes for symmetrie, lower/upper - # triangular parts and vectorization. - pair.index <- elem.pairs(seq(n)) - i <- pair.index[1, ] # `i` indices of `(i, j)` pairs - j <- pair.index[2, ] # `j` indices of `(i, j)` pairs - # Matrix of vectorized indices. (vec(index) -> seq) - index <- matrix(seq(n * n), n, n) - lower <- index[lower.tri(index)] - upper <- t(index)[lower] - - # Create all pairewise differences of rows of `X`. - X_diff <- X[i, , drop = F] - X[j, , drop = F] - # Identity matrix. - I_p <- diag(1, p) - - # Init tracking of current best (according multiple attempts). - V.best <- NULL - loss.best <- Inf - - # Start loop for multiple attempts. - for (attempt in 1:attempts) { - - # Sample a `(p, q)` dimensional matrix from the stiefel manifold as - # optimization start value. - V <- rStiefl(p, q) - - # Initial loss and gradient. - loss <- Inf - G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE) - # Set last loss (aka, loss after applying the step). - loss.last <- loss - - # Call logger with initial values before starting optimization. - if (is.function(logger)) { - epoch <- 0 # Set epoch count to 0 (only relevant for logging). - error <- NA - logger(environment()) - } - - ## Start optimization loop. - for (epoch in 1:epochs) { - - # Cayley transform matrix `A` - A <- (G %*% t(V)) - (V %*% t(G)) - - # Directional derivative of the loss at current position, given - # as `Tr(G^T \cdot A \cdot V)`. - loss.prime <- -0.5 * norm(A, type = 'F')^2 - - # Linesearch - tau.upper <- Inf - tau.lower <- 0 - tau <- tau.init - for (iter in 1:max.linesearch.iter) { - # Apply learning rate `tau`. - A.tau <- (tau / 2) * A - # Parallet transport (on Stiefl manifold) into direction of `G`. - inv <- solve(I_p + A.tau) - V.tau <- inv %*% ((I_p - A.tau) %*% V) - - # Loss at position after a step. - loss <- Inf # aka loss.tau - G.tau <- grad(X, Y, V.tau, h, loss.out = TRUE, persistent = TRUE) - - # Armijo condition. - if (loss > loss.last + (rho1 * tau * loss.prime)) { - tau.upper <- tau - tau <- (tau.lower + tau.upper) / 2 - next() - } - - V.prime.tau <- -0.5 * inv %*% A %*% (V + V.tau) - loss.prime.tau <- sum(G * V.prime.tau) # Tr(grad(tau)^T \cdot Y^'(tau)) - - # Wolfe condition. - if (loss.prime.tau < rho2 * loss.prime) { - tau.lower <- tau - if (tau.upper == Inf) { - tau <- 2 * tau.lower - } else { - tau <- (tau.lower + tau.upper) / 2 - } - } else { - break() - } - } - - # Compute error. - error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F") - - # Check break condition (epoch check to skip ignored gradient calc). - # Note: the devision by `sqrt(2 * k)` is included in `tol`. - if (error < tol | epoch >= epochs) { - # take last step and stop optimization. - V <- V.tau - # Final call to the logger before stopping optimization - if (is.function(logger)) { - G <- G.tau - logger(environment()) - } - break() - } - - # Perform the step and remember previous loss. - V <- V.tau - loss.last <- loss - G <- G.tau - - # Log after taking current step. - if (is.function(logger)) { - logger(environment()) - } - - } - - # Check if current attempt improved previous ones - if (loss < loss.best) { - loss.best <- loss - V.best <- V - } - - } - - return(list( - loss = loss.best, - V = V.best, - B = null(V.best), - h = h - )) -} diff --git a/CVE_C/R/gradient.R b/CVE_C/R/gradient.R deleted file mode 100644 index 8baaa88..0000000 --- a/CVE_C/R/gradient.R +++ /dev/null @@ -1,48 +0,0 @@ -#' Compute get gradient of `L(V)` given a dataset `X`. -#' -#' @param X Data matrix. -#' @param Y Responce. -#' @param V Position to compute the gradient at, aka point on Stiefl manifold. -#' @param h Bandwidth -#' @param loss.out Iff \code{TRUE} loss will be written to parent environment. -#' @param loss.only Boolean to only compute the loss, of \code{TRUE} a single -#' value loss is returned and \code{envir} is ignored. -#' @param persistent Determines if data indices and dependent calculations shall -#' be reused from the parent environment. ATTENTION: Do NOT set this flag, only -#' intended for internal usage by carefully aligned functions! -#' @keywords internal -#' @export -grad <- function(X, Y, V, h, - loss.out = FALSE, - loss.only = FALSE, - persistent = FALSE) { - # Get number of samples and dimension. - n <- nrow(X) - p <- ncol(X) - - if (!persistent) { - # Compute lookup indexes for symmetrie, lower/upper - # triangular parts and vectorization. - pair.index <- elem.pairs(seq(n)) - i <- pair.index[1, ] # `i` indices of `(i, j)` pairs - j <- pair.index[2, ] # `j` indices of `(i, j)` pairs - # Index of vectorized matrix, for lower and upper triangular part. - lower <- ((i - 1) * n) + j - upper <- ((j - 1) * n) + i - - # Create all pairewise differences of rows of `X`. - X_diff <- X[i, , drop = F] - X[j, , drop = F] - } - - out <- .Call("grad_c", PACKAGE = "CVE", - X, X_diff, as.double(Y), V, as.double(h)); - - if (loss.only) { - return(out$loss) - } - if (loss.out) { - loss <<- out$loss - } - - return(out$G) -} diff --git a/CVE_C/R/util.R b/CVE_C/R/util.R index ca7606a..b114fc0 100644 --- a/CVE_C/R/util.R +++ b/CVE_C/R/util.R @@ -1,4 +1,4 @@ -#' Samples uniform from the Stiefel Manifold +#' Samples uniform from the Stiefl Manifold. #' #' @param p row dim. #' @param q col dim. @@ -10,6 +10,48 @@ rStiefl <- function(p, q) { return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q)))) } +#' Retraction to the manifold. +#' +#' @param A matrix. +#' @return `(p, q)` semi-orthogonal matrix, aka element of the Stiefl manifold. +#' @keywords internal +#' @export +retractStiefl <- function(A) { + return(qr.Q(qr(A))) +} + +#' Skew-Symmetric matrix computed from `A` as +#' \eqn{1/2 (A - A^T)}. +#' @param A Matrix of dim `(p, q)` +#' @return Skew-Symmetric matrix of dim `(p, p)`. +#' @keywords internal +#' @export +skew <- function(A) { + 0.5 * (A - t(A)) +} + +#' Symmetric matrix computed from `A` as +#' \eqn{1/2 (A + A^T)}. +#' @param A Matrix of dim `(p, q)` +#' @return Symmetric matrix of dim `(p, p)`. +#' @keywords internal +#' @export +sym <- function(A) { + 0.5 * (A + t(A)) +} + +#' Orthogonal Projection onto the tangent space of the stiefl manifold. +#' +#' @param V Point on the stiefl manifold. +#' @param G matrix to be projected onto the tangent space at `V`. +#' @return `(p, q)` matrix as element of the tangent space at `V`. +#' @keywords internal +#' @export +projTangentStiefl <- function(V, G) { + Q <- diag(1, nrow(V)) - V %*% t(V) + return(Q %*% G + V %*% skew(t(V) %*% G)) +} + #' Null space basis of given matrix `V` #' #' @param V `(p, q)` matrix @@ -19,7 +61,7 @@ rStiefl <- function(p, q) { null <- function(V) { tmp <- qr(V) set <- if(tmp$rank == 0L) seq_len(ncol(V)) else -seq_len(tmp$rank) - return(qr.Q(tmp, complete=TRUE)[, set, drop=FALSE]) + return(qr.Q(tmp, complete = TRUE)[, set, drop = FALSE]) } #' Creates a (numeric) matrix where each column contains diff --git a/CVE_C/man/cve.Rd b/CVE_C/man/cve.Rd index d3e0d05..cb8e03e 100644 --- a/CVE_C/man/cve.Rd +++ b/CVE_C/man/cve.Rd @@ -5,10 +5,11 @@ \alias{cve.call} \title{Implementation of the CVE method.} \usage{ -cve(formula, data, method = "simple", max.dim = 10, ...) +cve(formula, data, method = "simple", max.dim = 10L, ...) -cve.call(X, Y, method = "simple", nObs = nrow(X)^0.5, min.dim = 1, - max.dim = 10, k, ...) +cve.call(X, Y, method = "simple", nObs = sqrt(nrow(X)), h = NULL, + min.dim = 1L, max.dim = 10L, k = NULL, tau = 1, tol = 0.001, + epochs = 50L, attempts = 10L, logger = NULL) } \arguments{ \item{formula}{Formel for the regression model defining `X`, `Y`. diff --git a/CVE_C/man/cve_linesearch.Rd b/CVE_C/man/cve_linesearch.Rd deleted file mode 100644 index 0413346..0000000 --- a/CVE_C/man/cve_linesearch.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cve_linesearch.R -\name{cve_linesearch} -\alias{cve_linesearch} -\title{Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe -conditions.} -\usage{ -cve_linesearch(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1, - tol = 0.001, rho1 = 0.1, rho2 = 0.9, slack = 0, epochs = 50L, - attempts = 10L, max.linesearch.iter = 10L, logger = NULL) -} -\description{ -Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe -conditions. -} -\keyword{internal} diff --git a/CVE_C/man/cve_sgd.Rd b/CVE_C/man/cve_sgd.Rd deleted file mode 100644 index fc00f7d..0000000 --- a/CVE_C/man/cve_sgd.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cve_sgd.R -\name{cve_sgd} -\alias{cve_sgd} -\title{Simple implementation of the CVE method. 'Simple' means that this method is -a classic GD method unsing no further tricks.} -\usage{ -cve_sgd(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 0.01, - tol = 0.001, epochs = 50L, batch.size = 16L, attempts = 10L, - logger = NULL) -} -\description{ -Simple implementation of the CVE method. 'Simple' means that this method is -a classic GD method unsing no further tricks. -} -\keyword{internal} diff --git a/CVE_C/man/cve_simple.Rd b/CVE_C/man/cve_simple.Rd deleted file mode 100644 index 67d185f..0000000 --- a/CVE_C/man/cve_simple.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cve_simple.R -\name{cve_simple} -\alias{cve_simple} -\title{Simple implementation of the CVE method. 'Simple' means that this method is -a classic GD method unsing no further tricks.} -\usage{ -cve_simple(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1, - tol = 0.001, slack = 0, epochs = 50L, attempts = 10L, - logger = NULL) -} -\description{ -Simple implementation of the CVE method. 'Simple' means that this method is -a classic GD method unsing no further tricks. -} -\keyword{internal} diff --git a/CVE_C/man/grad.Rd b/CVE_C/man/grad.Rd deleted file mode 100644 index d890e93..0000000 --- a/CVE_C/man/grad.Rd +++ /dev/null @@ -1,31 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gradient.R -\name{grad} -\alias{grad} -\title{Compute get gradient of `L(V)` given a dataset `X`.} -\usage{ -grad(X, Y, V, h, loss.out = FALSE, loss.only = FALSE, - persistent = FALSE) -} -\arguments{ -\item{X}{Data matrix.} - -\item{Y}{Responce.} - -\item{V}{Position to compute the gradient at, aka point on Stiefl manifold.} - -\item{h}{Bandwidth} - -\item{loss.out}{Iff \code{TRUE} loss will be written to parent environment.} - -\item{loss.only}{Boolean to only compute the loss, of \code{TRUE} a single -value loss is returned and \code{envir} is ignored.} - -\item{persistent}{Determines if data indices and dependent calculations shall -be reused from the parent environment. ATTENTION: Do NOT set this flag, only -intended for internal usage by carefully aligned functions!} -} -\description{ -Compute get gradient of `L(V)` given a dataset `X`. -} -\keyword{internal} diff --git a/CVE_C/man/projTangentStiefl.Rd b/CVE_C/man/projTangentStiefl.Rd new file mode 100644 index 0000000..bce4af5 --- /dev/null +++ b/CVE_C/man/projTangentStiefl.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/util.R +\name{projTangentStiefl} +\alias{projTangentStiefl} +\title{Orthogonal Projection onto the tangent space of the stiefl manifold.} +\usage{ +projTangentStiefl(V, G) +} +\arguments{ +\item{V}{Point on the stiefl manifold.} + +\item{G}{matrix to be projected onto the tangent space at `V`.} +} +\value{ +`(p, q)` matrix as element of the tangent space at `V`. +} +\description{ +Orthogonal Projection onto the tangent space of the stiefl manifold. +} +\keyword{internal} diff --git a/CVE_C/man/rStiefl.Rd b/CVE_C/man/rStiefl.Rd index 31c40bb..fe7ae5d 100644 --- a/CVE_C/man/rStiefl.Rd +++ b/CVE_C/man/rStiefl.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/util.R \name{rStiefl} \alias{rStiefl} -\title{Samples uniform from the Stiefel Manifold} +\title{Samples uniform from the Stiefl Manifold.} \usage{ rStiefl(p, q) } @@ -15,7 +15,7 @@ rStiefl(p, q) `(p, q)` semi-orthogonal matrix } \description{ -Samples uniform from the Stiefel Manifold +Samples uniform from the Stiefl Manifold. } \examples{ V <- rStiefel(6, 4) diff --git a/CVE_C/man/retractStiefl.Rd b/CVE_C/man/retractStiefl.Rd new file mode 100644 index 0000000..ac04876 --- /dev/null +++ b/CVE_C/man/retractStiefl.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/util.R +\name{retractStiefl} +\alias{retractStiefl} +\title{Retraction to the manifold.} +\usage{ +retractStiefl(A) +} +\arguments{ +\item{A}{matrix.} +} +\value{ +`(p, q)` semi-orthogonal matrix, aka element of the Stiefl manifold. +} +\description{ +Retraction to the manifold. +} +\keyword{internal} diff --git a/CVE_C/man/skew.Rd b/CVE_C/man/skew.Rd new file mode 100644 index 0000000..9caf669 --- /dev/null +++ b/CVE_C/man/skew.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/util.R +\name{skew} +\alias{skew} +\title{Skew-Symmetric matrix computed from `A` as +\eqn{1/2 (A - A^T)}.} +\usage{ +skew(A) +} +\arguments{ +\item{A}{Matrix of dim `(p, q)`} +} +\value{ +Skew-Symmetric matrix of dim `(p, p)`. +} +\description{ +Skew-Symmetric matrix computed from `A` as +\eqn{1/2 (A - A^T)}. +} +\keyword{internal} diff --git a/CVE_C/man/sym.Rd b/CVE_C/man/sym.Rd new file mode 100644 index 0000000..3d1d407 --- /dev/null +++ b/CVE_C/man/sym.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/util.R +\name{sym} +\alias{sym} +\title{Symmetric matrix computed from `A` as +\eqn{1/2 (A + A^T)}.} +\usage{ +sym(A) +} +\arguments{ +\item{A}{Matrix of dim `(p, q)`} +} +\value{ +Symmetric matrix of dim `(p, p)`. +} +\description{ +Symmetric matrix computed from `A` as +\eqn{1/2 (A + A^T)}. +} +\keyword{internal} diff --git a/CVE_C/src/cve_simple.c b/CVE_C/src/cve_simple.c index 70f9adb..e57a0b4 100644 --- a/CVE_C/src/cve_simple.c +++ b/CVE_C/src/cve_simple.c @@ -12,7 +12,7 @@ void cve_simple_sub(const int n, const int p, const int q, double *V, double *L, SEXP logger, SEXP loggerEnv) { - int attempt, epoch, i, j, k, nn = (n * (n - 1)) / 2; + int attempt, epoch, i, nn = (n * (n - 1)) / 2; double loss, loss_last, loss_best, err, tau; double tol = tol_init * sqrt((double)(2 * q)); double gKscale = -0.5 / h; diff --git a/CVE_R/R/CVE.R b/CVE_R/R/CVE.R index 773d62c..df42ed6 100644 --- a/CVE_R/R/CVE.R +++ b/CVE_R/R/CVE.R @@ -10,6 +10,7 @@ #' #' @docType package #' @author Loki +#' @useDynLib CVE, .registration = TRUE "_PACKAGE" #' Implementation of the CVE method. @@ -54,21 +55,21 @@ #' @import stats #' @importFrom stats model.frame #' @export -cve <- function(formula, data, method = "simple", max.dim = 10, ...) { +cve <- function(formula, data, method = "simple", max.dim = 10L, ...) { # check for type of `data` if supplied and set default if (missing(data)) { data <- environment(formula) } else if (!is.data.frame(data)) { - stop('Parameter `data` must be a `data.frame` or missing.') + stop("Parameter 'data' must be a 'data.frame' or missing.") } # extract `X`, `Y` from `formula` with `data` model <- stats::model.frame(formula, data) - X <- as.matrix(model[,-1, drop = FALSE]) - Y <- as.matrix(model[, 1, drop = FALSE]) + X <- as.matrix(model[ ,-1L, drop = FALSE]) + Y <- as.double(model[ , 1L]) # pass extracted data on to [cve.call()] - dr <- cve.call(X, Y, method = method, ...) + dr <- cve.call(X, Y, method = method, max.dim = max.dim, ...) # overwrite `call` property from [cve.call()] dr$call <- match.call() @@ -83,35 +84,81 @@ cve <- function(formula, data, method = "simple", max.dim = 10, ...) { #' @param ... Method specific parameters. #' @rdname cve #' @export -cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5, - min.dim = 1, max.dim = 10, k, ...) { +cve.call <- function(X, Y, method = "simple", + nObs = sqrt(nrow(X)), h = NULL, + min.dim = 1L, max.dim = 10L, k = NULL, + tau = 1.0, tol = 1e-3, + epochs = 50L, attempts = 10L, + logger = NULL) { # parameter checking - if (!is.matrix(X)) { - stop('X should be a matrices.') + if (!(is.matrix(X) && is.numeric(X))) { + stop("Parameter 'X' should be a numeric matrices.") } - if (is.matrix(Y)) { - Y <- as.vector(Y) + if (!is.numeric(Y)) { + stop("Parameter 'Y' must be numeric.") + } + if (is.matrix(Y) || !is.double(Y)) { + Y <- as.double(Y) } if (nrow(X) != length(Y)) { - stop('Rows of X and number of Y elements are not compatible.') + stop("Rows of 'X' and 'Y' elements are not compatible.") } if (ncol(X) < 2) { - stop('X is one dimensional, no need for dimension reduction.') + stop("'X' is one dimensional, no need for dimension reduction.") } - if (!missing(k)) { - min.dim <- as.integer(k) - max.dim <- as.integer(k) - } else { + if (missing(k) || is.null(k)) { min.dim <- as.integer(min.dim) max.dim <- as.integer(min(max.dim, ncol(X) - 1L)) + } else { + min.dim <- as.integer(k) + max.dim <- as.integer(k) } if (min.dim > max.dim) { - stop('`min.dim` bigger `max.dim`.') + stop("'min.dim' bigger 'max.dim'.") } if (max.dim >= ncol(X)) { - stop('`max.dim` must be smaller than `ncol(X)`.') + stop("'max.dim' (or 'k') must be smaller than 'ncol(X)'.") + } + + if (is.function(h)) { + estimate.bandwidth <- h + h <- NULL + } + + if (!is.numeric(tau) || length(tau) > 1L || tau <= 0.0) { + stop("Initial step-width 'tau' must be positive number.") + } else { + tau <- as.double(tau) + } + if (!is.numeric(tol) || length(tol) > 1L || tol < 0.0) { + stop("Break condition tolerance 'tol' must be not negative number.") + } else { + tol <- as.double(tol) + } + + if (!is.numeric(epochs) || length(epochs) > 1L) { + stop("Parameter 'epochs' must be positive integer.") + } else if (!is.integer(epochs)) { + epochs <- as.integer(epochs) + } + if (epochs < 1L) { + stop("Parameter 'epochs' must be at least 1L.") + } + if (!is.numeric(attempts) || length(attempts) > 1L) { + stop("Parameter 'attempts' must be positive integer.") + } else if (!is.integer(attempts)) { + attempts <- as.integer(attempts) + } + if (attempts < 1L) { + stop("Parameter 'attempts' must be at least 1L.") + } + + if (is.function(logger)) { + loggerEnv <- environment(logger) + } else { + loggerEnv <- NULL } # Call specified method. @@ -119,15 +166,40 @@ cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5, call <- match.call() dr <- list() for (k in min.dim:max.dim) { + + if (missing(h) || is.null(h)) { + h <- estimate.bandwidth(X, k, nObs) + } else if (is.numeric(h) && h > 0.0) { + h <- as.double(h) + } else { + stop("Bandwidth 'h' must be positive numeric.") + } + if (method == 'simple') { - dr.k <- cve_simple(X, Y, k, nObs = nObs, ...) - } else if (method == 'linesearch') { - dr.k <- cve_linesearch(X, Y, k, nObs = nObs, ...) - } else if (method == 'sgd') { - dr.k <- cve_sgd(X, Y, k, nObs = nObs, ...) + dr.k <- .Call('cve_simple', PACKAGE = 'CVE', + X, Y, k, h, + tau, tol, + epochs, attempts, + logger, loggerEnv) + # dr.k <- cve_simple(X, Y, k, nObs = nObs, ...) + # } else if (method == 'linesearch') { + # dr.k <- cve_linesearch(X, Y, k, nObs = nObs, ...) + # } else if (method == 'rcg') { + # dr.k <- cve_rcg(X, Y, k, nObs = nObs, ...) + # } else if (method == 'momentum') { + # dr.k <- cve_momentum(X, Y, k, nObs = nObs, ...) + # } else if (method == 'rmsprob') { + # dr.k <- cve_rmsprob(X, Y, k, nObs = nObs, ...) + # } else if (method == 'sgdrmsprob') { + # dr.k <- cve_sgdrmsprob(X, Y, k, nObs = nObs, ...) + # } else if (method == 'sgd') { + # dr.k <- cve_sgd(X, Y, k, nObs = nObs, ...) } else { stop('Got unknown method.') } + dr.k$B <- null(dr.k$V) + dr.k$loss <- mean(dr.k$L) + dr.k$h <- h dr.k$k <- k class(dr.k) <- "cve.k" dr[[k]] <- dr.k @@ -140,11 +212,6 @@ cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5, return(dr) } -# TODO: write summary -# summary.cve <- function() { -# # code # -# } - #' Ploting helper for objects of class \code{cve}. #' #' @param x Object of class \code{cve} (result of [cve()]). @@ -164,43 +231,19 @@ cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5, #' @method plot cve #' @export plot.cve <- function(x, ...) { - - - # H <- x$history - # H_1 <- H[!is.na(H[, 1]), 1] - - # defaults <- list( - # main = "History", - # xlab = "Iterations i", - # ylab = expression(loss == L[n](V^{(i)})), - # xlim = c(1, nrow(H)), - # ylim = c(0, max(H)), - # type = "l" - # ) - - # call.plot <- match.call() - # keys <- names(defaults) - # keys <- keys[match(keys, names(call.plot)[-1], nomatch = 0) == 0] - - # for (key in keys) { - # call.plot[[key]] <- defaults[[key]] - # } - - # call.plot[[1L]] <- quote(plot) - # call.plot$x <- quote(1:length(H_1)) - # call.plot$y <- quote(H_1) - - # eval(call.plot) - - # if (ncol(H) > 1) { - # for (i in 2:ncol(H)) { - # H_i <- H[H[, i] > 0, i] - # lines(1:length(H_i), H_i) - # } - # } - # x.ends <- apply(H, 2, function(h) { length(h[!is.na(h)]) }) - # y.ends <- apply(H, 2, function(h) { tail(h[!is.na(h)], n=1) }) - # points(x.ends, y.ends) + L <- c() + k <- c() + for (dr.k in x) { + if (class(dr.k) == 'cve.k') { + k <- c(k, paste0(dr.k$k)) + L <- c(L, dr.k$L) + } + } + L <- matrix(L, ncol = length(k)) + boxplot(L, main = "Loss ...", + xlab = "SDR dimension k", + ylab = expression(L(V, X[i])), + names = k) } #' Prints a summary of a \code{cve} result. diff --git a/CVE_C/R/cve_simple.R b/CVE_R/R/cve_momentum.R similarity index 82% rename from CVE_C/R/cve_simple.R rename to CVE_R/R/cve_momentum.R index 138751e..c87df10 100644 --- a/CVE_C/R/cve_simple.R +++ b/CVE_R/R/cve_momentum.R @@ -1,17 +1,19 @@ -#' Simple implementation of the CVE method. 'Simple' means that this method is -#' a classic GD method unsing no further tricks. +#' Implementation of the CVE method as a Riemann Conjugated Gradient method. #' +#' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector +#' Transport for Optimization on the Stiefel Manifold #' @keywords internal #' @export -cve_simple <- function(X, Y, k, - nObs = sqrt(nrow(X)), - h = NULL, - tau = 1.0, - tol = 1e-3, - slack = 0, - epochs = 50L, - attempts = 10L, - logger = NULL +cve_momentum <- function(X, Y, k, + nObs = sqrt(nrow(X)), + h = NULL, + tau = 1.0, + tol = 1e-4, + rho = 0.1, # Momentum update. + slack = 0, + epochs = 50L, + attempts = 10L, + logger = NULL ) { # Set `grad` functions environment to enable if to find this environments # local variabels, needed to enable the manipulation of this local variables @@ -67,9 +69,6 @@ cve_simple <- function(X, Y, k, # Set last loss (aka, loss after applying the step). loss.last <- loss - # Cayley transform matrix `A` - A <- (G %*% t(V)) - (V %*% t(G)) - # Call logger with initial values before starting optimization. if (is.function(logger)) { epoch <- 0 # Set epoch count to 0 (only relevant for logging). @@ -77,12 +76,15 @@ cve_simple <- function(X, Y, k, logger(environment()) } + M <- matrix(0, p, q) ## Start optimization loop. for (epoch in 1:epochs) { # Apply learning rate `tau`. - A.tau <- tau * A + A <- projTangentStiefl(V, G) + # Momentum update. + M <- A + rho * projTangentStiefl(V, M) # Parallet transport (on Stiefl manifold) into direction of `G`. - V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V) + V.tau <- retractStiefl(V - tau * M) # Loss at position after a step. loss <- grad(X, Y, V.tau, h, loss.only = TRUE, persistent = TRUE) @@ -119,9 +121,6 @@ cve_simple <- function(X, Y, k, # Compute gradient at new position. G <- grad(X, Y, V, h, persistent = TRUE) - - # Cayley transform matrix `A` - A <- (G %*% t(V)) - (V %*% t(G)) } # Check if current attempt improved previous ones @@ -129,7 +128,6 @@ cve_simple <- function(X, Y, k, loss.best <- loss V.best <- V } - } return(list( diff --git a/CVE_R/R/cve_rcg.R b/CVE_R/R/cve_rcg.R new file mode 100644 index 0000000..f1cc8fa --- /dev/null +++ b/CVE_R/R/cve_rcg.R @@ -0,0 +1,179 @@ +#' Implementation of the CVE method as a Riemann Conjugated Gradient method. +#' +#' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector +#' Transport for Optimization on the Stiefel Manifold +#' @keywords internal +#' @export +cve_rcg <- function(X, Y, k, + nObs = sqrt(nrow(X)), + h = NULL, + tau = 1.0, + tol = 1e-4, + rho = 1e-4, # For Armijo condition. + slack = 0, + epochs = 50L, + attempts = 10L, + max.linesearch.iter = 20L, + logger = NULL +) { + # Set `grad` functions environment to enable if to find this environments + # local variabels, needed to enable the manipulation of this local variables + # from within `grad`. + environment(grad) <- environment() + + # Get dimensions. + n <- nrow(X) # Number of samples. + p <- ncol(X) # Data dimensions + q <- p - k # Complement dimension of the SDR space. + + # Save initial learning rate `tau`. + tau.init <- tau + # Addapt tolearance for break condition. + tol <- sqrt(2 * q) * tol + + # Estaimate bandwidth if not given. + if (missing(h) || !is.numeric(h)) { + h <- estimate.bandwidth(X, k, nObs) + } + + # Compute persistent data. + # Compute lookup indexes for symmetrie, lower/upper + # triangular parts and vectorization. + pair.index <- elem.pairs(seq(n)) + i <- pair.index[1, ] # `i` indices of `(i, j)` pairs + j <- pair.index[2, ] # `j` indices of `(i, j)` pairs + # Index of vectorized matrix, for lower and upper triangular part. + lower <- ((i - 1) * n) + j + upper <- ((j - 1) * n) + i + + # Create all pairewise differences of rows of `X`. + X_diff <- X[i, , drop = F] - X[j, , drop = F] + # Identity matrix. + I_p <- diag(1, p) + + # Init tracking of current best (according multiple attempts). + V.best <- NULL + loss.best <- Inf + + # Start loop for multiple attempts. + for (attempt in 1:attempts) { + # Reset learning rate `tau`. + tau <- tau.init + + # Sample a `(p, q)` dimensional matrix from the stiefel manifold as + # optimization start value. + V <- rStiefl(p, q) + + # Initial loss and gradient. + loss <- Inf + G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE) + # Set last loss (aka, loss after applying the step). + loss.last <- loss + + # Cayley transform matrix `A` + A <- (G %*% t(V)) - (V %*% t(G)) + A.last <- A + + W <- -A + Z <- W %*% V + + # Compute directional derivative. + loss.prime <- sum(G * Z) # Tr(G^T Z) + + # Call logger with initial values before starting optimization. + if (is.function(logger)) { + epoch <- 0 # Set epoch count to 0 (only relevant for logging). + error <- NA + logger(environment()) + } + + ## Start optimization loop. + for (epoch in 1:epochs) { + # New directional derivative. + loss.prime <- sum(G * Z) + + # Reset `tau` for step-size selection. + tau <- tau.init + for (iter in 1:max.linesearch.iter) { + V.tau <- retractStiefl(V + tau * Z) + # Loss at position after a step. + loss <- grad(X, Y, V.tau, h, + loss.only = TRUE, persistent = TRUE) + # Check Armijo condition. + if (loss <= loss.last + (rho * tau * loss.prime)) { + break() # Iff fulfilled stop linesearch. + } + # Reduce step-size and continue linesearch. + tau <- tau / 2 + } + + # Compute error. + error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F") + + # Perform step with found step-size + V <- V.tau + loss.last <- loss + + # Call logger. + if (is.function(logger)) { + logger(environment()) + } + + # Check break condition. + # Note: the devision by `sqrt(2 * k)` is included in `tol`. + if (error < tol) { + break() + } + + # Compute Gradient at new position. + G <- grad(X, Y, V, h, persistent = TRUE) + # Store last `A` for `beta` computation. + A.last <- A + # Cayley transform matrix `A` + A <- (G %*% t(V)) - (V %*% t(G)) + + # Check 2. break condition. + if (norm(A, type = 'F') < tol) { + break() + } + + # New directional derivative. + loss.prime <- sum(G * Z) + + # Reset beta if needed. + if (loss.prime < 0) { + # Compute `beta` as describet in paper. + beta.FR <- (norm(A, type = 'F') / norm(A.last, type = 'F'))^2 + beta.PR <- sum(A * (A - A.last)) / norm(A.last, type = 'F')^2 + if (beta.PR < -beta.FR) { + beta <- -beta.FR + } else if (abs(beta.PR) < beta.FR) { + beta <- beta.PR + } else if (beta.PR > beta.FR) { + beta <- beta.FR + } else { + beta <- 0 + } + } else { + beta <- 0 + } + + # Update direction. + W <- -A + beta * W + Z <- W %*% V + } + + # Check if current attempt improved previous ones + if (loss < loss.best) { + loss.best <- loss + V.best <- V + } + } + + return(list( + loss = loss.best, + V = V.best, + B = null(V.best), + h = h + )) +} diff --git a/CVE_R/R/cve_rmsprob.R b/CVE_R/R/cve_rmsprob.R new file mode 100644 index 0000000..10b0a7c --- /dev/null +++ b/CVE_R/R/cve_rmsprob.R @@ -0,0 +1,121 @@ +#' Implementation of the CVE method as a Riemann Conjugated Gradient method. +#' +#' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector +#' Transport for Optimization on the Stiefel Manifold +#' @keywords internal +#' @export +cve_rmsprob <- function(X, Y, k, + nObs = sqrt(nrow(X)), + h = NULL, + tau = 0.1, + tol = 1e-4, + rho = 0.1, # Momentum update. + slack = 0, + epochs = 50L, + attempts = 10L, + epsilon = 1e-7, + max.linesearch.iter = 20L, + logger = NULL +) { + # Set `grad` functions environment to enable if to find this environments + # local variabels, needed to enable the manipulation of this local variables + # from within `grad`. + environment(grad) <- environment() + + # Get dimensions. + n <- nrow(X) # Number of samples. + p <- ncol(X) # Data dimensions + q <- p - k # Complement dimension of the SDR space. + + # Save initial learning rate `tau`. + tau.init <- tau + # Addapt tolearance for break condition. + tol <- sqrt(2 * q) * tol + + # Estaimate bandwidth if not given. + if (missing(h) || !is.numeric(h)) { + h <- estimate.bandwidth(X, k, nObs) + } + + # Compute persistent data. + # Compute lookup indexes for symmetrie, lower/upper + # triangular parts and vectorization. + pair.index <- elem.pairs(seq(n)) + i <- pair.index[1, ] # `i` indices of `(i, j)` pairs + j <- pair.index[2, ] # `j` indices of `(i, j)` pairs + # Index of vectorized matrix, for lower and upper triangular part. + lower <- ((i - 1) * n) + j + upper <- ((j - 1) * n) + i + + # Create all pairewise differences of rows of `X`. + X_diff <- X[i, , drop = F] - X[j, , drop = F] + # Identity matrix. + I_p <- diag(1, p) + + # Init tracking of current best (according multiple attempts). + V.best <- NULL + loss.best <- Inf + + # Start loop for multiple attempts. + for (attempt in 1:attempts) { + # Sample a `(p, q)` dimensional matrix from the stiefel manifold as + # optimization start value. + V <- rStiefl(p, q) + + # Call logger with initial values before starting optimization. + if (is.function(logger)) { + loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE) + epoch <- 0 # Set epoch count to 0 (only relevant for logging). + error <- NA + logger(environment()) + } + + M <- matrix(0, p, q) + ## Start optimization loop. + for (epoch in 1:epochs) { + # Compute gradient and loss at current position. + loss <- Inf + G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE) + # Projectd Gradient. + A <- projTangentStiefl(V, G) + # Projected element squared gradient. + Asq <- projTangentStiefl(V, G * G) + # Momentum update. + M <- (1 - rho) * Asq + rho * projTangentStiefl(V, M) + # Parallet transport (on Stiefl manifold) into direction of `G`. + V.tau <- retractStiefl(V - tau.init * A / (sqrt(abs(M)) + epsilon)) + + # Compute error. + error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F") + + # Perform step. + V <- V.tau + + # Call logger after taking a step. + if (is.function(logger)) { + # Set tau to an step size estimate (only for logging) + tau <- tau.init / mean(sqrt(abs(M)) + epsilon) + logger(environment()) + } + + # Check break condition. + # Note: the devision by `sqrt(2 * k)` is included in `tol`. + if (error < tol) { + break() + } + } + + # Check if current attempt improved previous ones + if (loss < loss.best) { + loss.best <- loss + V.best <- V + } + } + + return(list( + loss = loss.best, + V = V.best, + B = null(V.best), + h = h + )) +} diff --git a/CVE_C/R/cve_sgd.R b/CVE_R/R/cve_sgdrmsprob.R similarity index 87% rename from CVE_C/R/cve_sgd.R rename to CVE_R/R/cve_sgdrmsprob.R index 3fee47f..98ee20f 100644 --- a/CVE_C/R/cve_sgd.R +++ b/CVE_R/R/cve_sgdrmsprob.R @@ -3,14 +3,16 @@ #' #' @keywords internal #' @export -cve_sgd <- function(X, Y, k, +cve_sgdrmsprob <- function(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, - tau = 0.01, - tol = 1e-3, + tau = 0.1, + tol = 1e-4, + rho = 0.1, epochs = 50L, batch.size = 16L, attempts = 10L, + epsilon = 1e-7, logger = NULL ) { # Set `grad` functions environment to enable if to find this environments @@ -72,6 +74,7 @@ cve_sgd <- function(X, Y, k, logger(environment()) } + M <- matrix(0, p, q) # Repeat `epochs` times for (epoch in 1:epochs) { # Shuffle batches @@ -87,13 +90,14 @@ cve_sgd <- function(X, Y, k, loss <- NULL G <- grad(X[batch, ], Y[batch], V, h, loss.out = TRUE) - # Cayley transform matrix. - A <- (G %*% t(V)) - (V %*% t(G)) - - # Apply learning rate `tau`. - A.tau <- tau * A + # Projectd Gradient. + A <- projTangentStiefl(V, G) + # Projected element squared gradient. + Asq <- projTangentStiefl(V, G * G) + # Momentum update. + M <- (1 - rho) * Asq + rho * projTangentStiefl(V, M) # Parallet transport (on Stiefl manifold) into direction of `G`. - V <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V) + V <- retractStiefl(V - tau.init * A / (sqrt(abs(M)) + epsilon)) } # And the error for the history. error <- norm(V.last %*% t(V.last) - V %*% t(V), type = "F") diff --git a/CVE_R/R/cve_simple.R b/CVE_R/R/cve_simple.R index 138751e..6526a70 100644 --- a/CVE_R/R/cve_simple.R +++ b/CVE_R/R/cve_simple.R @@ -72,9 +72,7 @@ cve_simple <- function(X, Y, k, # Call logger with initial values before starting optimization. if (is.function(logger)) { - epoch <- 0 # Set epoch count to 0 (only relevant for logging). - error <- NA - logger(environment()) + logger(0L, attempt, loss, V, tau) } ## Start optimization loop. @@ -103,7 +101,7 @@ cve_simple <- function(X, Y, k, V <- V.tau # Call logger last time befor stoping. if (is.function(logger)) { - logger(environment()) + logger(epoch, attempt, loss, V, tau) } break() } @@ -114,7 +112,7 @@ cve_simple <- function(X, Y, k, # Call logger after taking a step. if (is.function(logger)) { - logger(environment()) + logger(epoch, attempt, loss, V, tau) } # Compute gradient at new position. diff --git a/CVE_R/R/util.R b/CVE_R/R/util.R index ca7606a..d9d9efe 100644 --- a/CVE_R/R/util.R +++ b/CVE_R/R/util.R @@ -1,4 +1,4 @@ -#' Samples uniform from the Stiefel Manifold +#' Samples uniform from the Stiefl Manifold. #' #' @param p row dim. #' @param q col dim. @@ -10,6 +10,48 @@ rStiefl <- function(p, q) { return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q)))) } +#' Retraction to the manifold. +#' +#' @param A matrix. +#' @return `(p, q)` semi-orthogonal matrix, aka element of the Stiefl manifold. +#' @keywords internal +#' @export +retractStiefl <- function(A) { + return(qr.Q(qr(A))) +} + +#' Skew-Symmetric matrix computed from `A` as +#' \eqn{1/2 (A - A^T)}. +#' @param A Matrix of dim `(p, q)` +#' @return Skew-Symmetric matrix of dim `(p, p)`. +#' @keywords internal +#' @export +skew <- function(A) { + 0.5 * (A - t(A)) +} + +#' Symmetric matrix computed from `A` as +#' \eqn{1/2 (A + A^T)}. +#' @param A Matrix of dim `(p, q)` +#' @return Symmetric matrix of dim `(p, p)`. +#' @keywords internal +#' @export +sym <- function(A) { + 0.5 * (A + t(A)) +} + +#' Orthogonal Projection onto the tangent space of the stiefl manifold. +#' +#' @param V Point on the stiefl manifold. +#' @param G matrix to be projected onto the tangent space at `V`. +#' @return `(p, q)` matrix as element of the tangent space at `V`. +#' @keywords internal +#' @export +projTangentStiefl <- function(V, G) { + Q <- diag(1, nrow(V)) - V %*% t(V) + return(Q %*% G + V %*% skew(t(V) %*% G)) +} + #' Null space basis of given matrix `V` #' #' @param V `(p, q)` matrix diff --git a/CVE_legacy/Test_weigthed_cve.R b/CVE_legacy/Test_weigthed_cve.R new file mode 100644 index 0000000..c5fd51c --- /dev/null +++ b/CVE_legacy/Test_weigthed_cve.R @@ -0,0 +1,113 @@ + +#C:\Users\Lukas\Desktop\owncloud\Shared\Lukas\CVE +install.packages("C:/Users/Lukas/Desktop/owncloud/Shared/Lukas/CVE_1.0.tar.gz", repos=NULL, type="source") +install.packages(file.choose(), repos=NULL, type="source") + +dim<-12 +N<-100 +s<-0.5 +dat<-creat_sample(rep(1,dim)/sqrt(dim),N,fsquare,0.5) +test<-cve(Y~.,data=as.data.frame(dat),k=1) +############## +#initialize model parameterss +m<-100 #number of replications in simulation +dim<-12 #dimension of random variable X +truedim<-2 #dimension of B=b +qs<-dim-truedim # dimension of orthogonal complement of B +b1=c(1,1,1,1,1,1,0,0,0,0,0,0)/sqrt(6) +b2=c(1,-1,1,-1,1,-1,0,0,0,0,0,0)/sqrt(6) +b<-cbind(b1,b2) +#b<-b1 +P<-b%*%t(b) +sigma=0.5 #error standard deviation +N<-70 #sample size +K<-30 #number of arbitrary starting values for curvilinear optimization +MAXIT<-30 #maximal number of iterations in curvilinear search algorithm +var_vec<-mat.or.vec(m,12) +M1_weight<-mat.or.vec(m,13) +#colnames(M1_weight)<-c('CVE1','CVE2','CVE3','CVE1_Rcpp','CVE2_Rcpp','CVE3_Rcpp','meanMAVE','csMAVE','phd','sir','save','CVE4') +#link function for M1 +fM1<-function(x){return(x[1]/(0.5+(x[2]+1.5)^2))} +for (i in 1:m){ + #generate dat according to M1 + dat<-creat_sample_nor_nonstand(b,N,fsquare,diag(rep(1,dim)),sigma) + #est sample covariance matrix + Sig_est<-est_varmat(dat[,-1]) + #est trace of sample covariance matrix + tr<-var_tr(Sig_est) + + #calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs + CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0) + CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0) + CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0) + + CVE4<-stiefl_weight_partial_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0) + CVE5<-stiefl_weight_partial_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0) + CVE6<-stiefl_weight_partial_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0) + # + CVE7<-stiefl_weight_full_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0) + CVE8<-stiefl_weight_full_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0) + CVE9<-stiefl_weight_full_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0) + # + # + var_vec[i,1]<-CVE1$var + var_vec[i,2]<-CVE2$var + var_vec[i,3]<-CVE3$var + + var_vec[i,4]<-CVE4$var + var_vec[i,5]<-CVE5$var + var_vec[i,6]<-CVE6$var + # + var_vec[i,7]<-CVE7$var + var_vec[i,8]<-CVE8$var + var_vec[i,9]<-CVE9$var + + CVE1$est_base<-fill_base(CVE1$est_base) + CVE2$est_base<-fill_base(CVE2$est_base) + CVE3$est_base<-fill_base(CVE3$est_base) + CVE4$est_base<-fill_base(CVE4$est_base) + CVE5$est_base<-fill_base(CVE5$est_base) + CVE6$est_base<-fill_base(CVE6$est_base) + CVE7$est_base<-fill_base(CVE7$est_base) + CVE8$est_base<-fill_base(CVE8$est_base) + CVE9$est_base<-fill_base(CVE9$est_base) + + # calculate distance between true B and estimated B + M1_weight[i,1]<-subspace_dist(CVE1$est_base[,1:truedim],b) + M1_weight[i,2]<-subspace_dist(CVE2$est_base[,1:truedim],b) + M1_weight[i,3]<-subspace_dist(CVE3$est_base[,1:truedim],b) + M1_weight[i,4]<-subspace_dist(CVE4$est_base[,1:truedim],b) + M1_weight[i,5]<-subspace_dist(CVE5$est_base[,1:truedim],b) + M1_weight[i,6]<-subspace_dist(CVE6$est_base[,1:truedim],b) + M1_weight[i,7]<-subspace_dist(CVE7$est_base[,1:truedim],b) + M1_weight[i,8]<-subspace_dist(CVE8$est_base[,1:truedim],b) + M1_weight[i,9]<-subspace_dist(CVE9$est_base[,1:truedim],b) + + + + CVE1_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,nObs=N^0.8,attempts=K,tol=10^(-3),slack=0)[[2]] + CVE2_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,nObs=N^(2/3),attempts=K,tol=10^(-3),slack=0)[[2]] + CVE3_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,nObs=N^0.5,attempts=K,tol=10^(-3),slack=0)[[2]] + + # CVE4_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,h=h_opt,attempts=K,tol=10^(-3))[[2]] + #M1_Rcpp[i,12]<-subspace_dist(CVE4_Rcpp$B,b) + + var_vec[i,10]<-CVE1_Rcpp$loss + var_vec[i,11]<-CVE2_Rcpp$loss + var_vec[i,12]<-CVE3_Rcpp$loss + + #calculate orthogonal complement of Vhat_k + #i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs)) + + + M1_weight[i,10]<-subspace_dist(CVE1_Rcpp$B,b) + M1_weight[i,11]<-subspace_dist(CVE2_Rcpp$B,b) + M1_weight[i,12]<-subspace_dist(CVE3_Rcpp$B,b) + #meanMAVE + mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE') + M1_weight[i,13]<-subspace_dist(mod_t2$dir[[truedim]],b) + + print(paste(i,paste('/',m))) +} +boxplot(M1_weight[1:(i-1),]/sqrt(2*truedim),names=colnames(M1_weight),ylab='err',main='M1') +summary(M1_weight[1:(i-1),]) diff --git a/CVE_legacy/stiefel_weight_package.R b/CVE_legacy/stiefel_weight_package.R new file mode 100644 index 0000000..5209133 --- /dev/null +++ b/CVE_legacy/stiefel_weight_package.R @@ -0,0 +1,222 @@ +LV_weight_partial<-function(V,Xl,dtemp,h,q,Y,grad=T){ + N<-length(Y) + if(is.vector(V)){k<-1} + else{k<-length(V[1,])} + Xlv<-Xl%*%V + d<-dtemp-((Xlv^2)%*%rep(1,k)) + w<-exp(-0.5*(d/h)^2)#dnorm(d/h)/dnorm(0) + w<-matrix(w,N,q) + wn<-apply(w,2,sum)#new + w<-apply(w,2,column_normalize) + mY<-t(w)%*%Y + sig<-t(w)%*%(Y^2)-(mY)^2 + W<-diag(kronecker(t(wn),rep(1,N)))##new + if(grad==T){ + grad<-mat.or.vec(dim,k) + tmp1<-(kronecker(sig,rep(1,N))-(as.vector(kronecker(rep(1,q),Y))-kronecker(mY,rep(1,N)))^2) + if(k==1){ + grad_d<- -2*Xl*as.vector(Xlv) + grad<-(1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1 #new + # wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N)) + # grad<- wn_grad%*%(sig-rep(var1[2],q))/(sum(wn))+grad + } + else{ + for (j in 1:(k)){ + grad_d<- -2*Xl*as.vector(Xlv[,j]) + grad[,j]<- (1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new + # wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N)) + # grad[,j]<- wn_grad%*%(sig-rep(var1[2],q))/(sum(wn))+grad + } + } + ret<-list(t(wn)%*%sig/sum(wn),sig,grad)#new + names(ret)<-c('var','sig','grad') + } + else{ + ret<-list(t(wn)%*%sig/sum(wn),sig)#new + names(ret)<-c('var','sig') + } + + return(ret) +} +###### +LV_weight_full<-function(V,Xl,dtemp,h,q,Y,grad=T){ + N<-length(Y) + if(is.vector(V)){k<-1} + else{k<-length(V[1,])} + Xlv<-Xl%*%V + d<-dtemp-((Xlv^2)%*%rep(1,k)) + w<-exp(-0.5*(d/h)^2)#dnorm(d/h)/dnorm(0) + w<-matrix(w,N,q) + wn<-apply(w,2,sum)#new + w<-apply(w,2,column_normalize) + mY<-t(w)%*%Y + sig<-t(w)%*%(Y^2)-(mY)^2 + W<-(kronecker(t(wn),rep(1,N)))#new or ###diag(kronecker(t(wn),rep(1,N))) + var<-t(wn)%*%sig/sum(wn)#new + if(grad==T){ + grad<-mat.or.vec(dim,k) + tmp1<-(kronecker(sig,rep(1,N))-(as.vector(kronecker(rep(1,q),Y))-kronecker(mY,rep(1,N)))^2) + if(k==1){ + grad_d<- -2*Xl*as.vector(Xlv) + grad<-(1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new + wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))#new + grad<- wn_grad%*%(sig-rep(var,q))/(sum(wn))+grad#new + } + else{ + for (j in 1:(k)){ + grad_d<- -2*Xl*as.vector(Xlv[,j]) + grad[,j]<- (1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new + wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))#new + grad[,j]<- wn_grad%*%(sig-rep(var,q))/(sum(wn))+grad[,j]#new + } + } + ret<-list(var,sig,grad) + names(ret)<-c('var','sig','grad') + } + else{ + ret<-list(var,sig) + names(ret)<-c('var','sig') + } + + return(ret) +} +#### +stiefl_weight_partial_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0){ + Y<-dat[,1] + X<-dat[,-1] + N<-length(Y) + dim<-length(X[1,]) + if(p<1){ + S<-est_varmat(X) + tmp1<-q_ind(X,S,p) + q<-tmp1$q + ind<-tmp1$ind + } + else{ + q<-N + ind<-1:N + } + Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N))) + dtemp<-apply(Xl,1,norm2) + if(is.null(h)){ + S<-est_varmat(X) + tr<-var_tr(S) + h<-choose_h_2(dim,k,N,nObs,tr) + } + best<-exp(10000) + Vend<-mat.or.vec(dim,k) + sig<-mat.or.vec(q,1) + for(u in 1:k0){ + Vnew<-Vold<-stiefl_startval(dim,k) + #print(Vold) + #print(LV(Vold,Xl,dtemp,h,q,Y)$var) + Lnew<-Lold<-exp(10000) + lambda<-lambda_0 + err<-10 + count<-0 + count2<-0 + while(err>tol&count sclack_para){#/(count+1)^(0.5) + lambda=lambda/2 + err<-10 + count2<-count2+1 + count<-count-1 + Vnew<-Vold #!!!!! + + } + Vold<-Vnew + count<-count+1 + #print(count) + } + if(best>Lnew){ + best<-Lnew + Vend<-Vnew + sig<-tmp3$sig + } + } + ret<-list(Vend,best,sig,count,h,count2) + names(ret)<-c('est_base','var','aov_dat','count','h','count2') + return(ret) +} +########### +stiefl_weight_full_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0){ + Y<-dat[,1] + X<-dat[,-1] + N<-length(Y) + dim<-length(X[1,]) + if(p<1){ + S<-est_varmat(X) + tmp1<-q_ind(X,S,p) + q<-tmp1$q + ind<-tmp1$ind + } + else{ + q<-N + ind<-1:N + } + Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N))) + dtemp<-apply(Xl,1,norm2) + if(is.null(h)){ + S<-est_varmat(X) + tr<-var_tr(S) + h<-choose_h_2(dim,k,N,nObs,tr) + } + best<-exp(10000) + Vend<-mat.or.vec(dim,k) + sig<-mat.or.vec(q,1) + for(u in 1:k0){ + Vnew<-Vold<-stiefl_startval(dim,k) + #print(Vold) + #print(LV(Vold,Xl,dtemp,h,q,Y)$var) + Lnew<-Lold<-exp(10000) + lambda<-lambda_0 + err<-10 + count<-0 + count2<-0 + while(err>tol&count sclack_para){#/(count+1)^(0.5) + lambda=lambda/2 + err<-10 + count2<-count2+1 + count<-count-1 + Vnew<-Vold #!!!!! + + } + Vold<-Vnew + count<-count+1 + #print(count) + } + if(best>Lnew){ + best<-Lnew + Vend<-Vnew + sig<-tmp3$sig + } + } + ret<-list(Vend,best,sig,count,h,count2) + names(ret)<-c('est_base','var','aov_dat','count','h','count2') + return(ret) +} diff --git a/LaTeX/notes.tex b/LaTeX/notes.tex new file mode 100644 index 0000000..5379a9f --- /dev/null +++ b/LaTeX/notes.tex @@ -0,0 +1,46 @@ +\documentclass[12pt,a4paper]{article} + +\usepackage[utf8]{inputenc} +\usepackage[T1]{fontenc} +\usepackage{amsmath, amsfonts, amssymb, amsthm} +\usepackage{fullpage} + +\newcommand{\vecl}{\ensuremath{\operatorname{vec}_l}} +\newcommand{\Sym}{\ensuremath{\operatorname{Sym}}} + +\begin{document} + +Indexing a given matrix $A = (a_{ij})_{i,j = 1, ..., n} \in \mathbb{R}^{n\times n}$ given as +\begin{displaymath} + A = \begin{pmatrix} + a_{0,0} & a_{0,1} & a_{0,2} & \ldots & a_{0,n-1} \\ + a_{1,0} & a_{1,1} & a_{1,2} & \ldots & a_{1,n-1} \\ + a_{2,0} & a_{2,1} & a_{2,2} & \ldots & a_{2,n-1} \\ + \vdots & \vdots & \vdots & \ddots & \vdots \\ + a_{n-1,0} & a_{n-1,1} & a_{n-1,2} & \ldots & a_{n-1,n-1} + \end{pmatrix} +\end{displaymath} + +A symmetric matrix with zero main diagonal, meaning a matrix $S = S^T$ with $S_{i,i} = 0,\ \forall i = 1,..,n$ is givne in the following form +\begin{displaymath} + S = \begin{pmatrix} + 0 & s_{1,0} & s_{2,0} & \ldots & s_{n-1,0} \\ + s_{1,0} & 0 & s_{2,1} & \ldots & s_{n-1,1} \\ + s_{2,0} & s_{2,1} & 0 & \ldots & s_{n-1,2} \\ + \vdots & \vdots & \vdots & \ddots & \vdots \\ + s_{n-1,0} & s_{n-1,1} & s_{n-1,2} & \ldots & 0 + \end{pmatrix} +\end{displaymath} +Therefore its sufficient to store only the lower triangular part, for memory efficiency and some further alrogithmic shortcuts (sometime they are more expencife) the symmetric matrix $S$ is stored in packed form, meanin in a vector of the length $\frac{n(n-1)}{2}$. We use (like for matrices) a column-major order of elements and define the $\vecl:\Sym(n)\to \mathbb{R}^{n(n-1) / 2}$ opperator defined as + +\begin{displaymath} + \vecl(S) = (s_{1,0}, s_{2,0},\cdots,s_{n-1,0},s_{2,1}\cdots,s_{n-1,n-2})^T +\end{displaymath} + +The relation between the matrix indices $i,j$ and the $\vecl$ index $k$ is given by + +\begin{displaymath} + (\vecl(S)_k = s_{i,j} \quad\Leftrightarrow\quad k = jn+i) : j \in \{0,...,n-2\} \land j < i < n. +\end{displaymath} + +\end{document} \ No newline at end of file