2
0
Fork 0
This commit is contained in:
Daniel Kapla 2019-09-25 14:49:12 +02:00
parent 8db5266f8e
commit 50302ce18a
25 changed files with 1000 additions and 409 deletions

View File

@ -5,15 +5,15 @@ S3method(summary,cve)
export(cve)
export(cve.call)
export(cve.grid.search)
export(cve_linesearch)
export(cve_sgd)
export(cve_simple)
export(dataset)
export(elem.pairs)
export(estimate.bandwidth)
export(grad)
export(null)
export(projTangentStiefl)
export(rStiefl)
export(retractStiefl)
export(skew)
export(sym)
import(stats)
importFrom(graphics,lines)
importFrom(graphics,plot)

View File

@ -1,169 +0,0 @@
#' Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
#' conditions.
#'
#' @keywords internal
#' @export
cve_linesearch <- function(X, Y, k,
nObs = sqrt(nrow(X)),
h = NULL,
tau = 1.0,
tol = 1e-3,
rho1 = 0.1,
rho2 = 0.9,
slack = 0,
epochs = 50L,
attempts = 10L,
max.linesearch.iter = 10L,
logger = NULL
) {
# Set `grad` functions environment to enable if to find this environments
# local variabels, needed to enable the manipulation of this local variables
# from within `grad`.
environment(grad) <- environment()
# Get dimensions.
n <- nrow(X)
p <- ncol(X)
q <- p - k
# Save initial learning rate `tau`.
tau.init <- tau
# Addapt tolearance for break condition.
tol <- sqrt(2 * q) * tol
# Estaimate bandwidth if not given.
if (missing(h) | !is.numeric(h)) {
h <- estimate.bandwidth(X, k, nObs)
}
# Compute persistent data.
# Compute lookup indexes for symmetrie, lower/upper
# triangular parts and vectorization.
pair.index <- elem.pairs(seq(n))
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
# Matrix of vectorized indices. (vec(index) -> seq)
index <- matrix(seq(n * n), n, n)
lower <- index[lower.tri(index)]
upper <- t(index)[lower]
# Create all pairewise differences of rows of `X`.
X_diff <- X[i, , drop = F] - X[j, , drop = F]
# Identity matrix.
I_p <- diag(1, p)
# Init tracking of current best (according multiple attempts).
V.best <- NULL
loss.best <- Inf
# Start loop for multiple attempts.
for (attempt in 1:attempts) {
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
# optimization start value.
V <- rStiefl(p, q)
# Initial loss and gradient.
loss <- Inf
G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
# Set last loss (aka, loss after applying the step).
loss.last <- loss
# Call logger with initial values before starting optimization.
if (is.function(logger)) {
epoch <- 0 # Set epoch count to 0 (only relevant for logging).
error <- NA
logger(environment())
}
## Start optimization loop.
for (epoch in 1:epochs) {
# Cayley transform matrix `A`
A <- (G %*% t(V)) - (V %*% t(G))
# Directional derivative of the loss at current position, given
# as `Tr(G^T \cdot A \cdot V)`.
loss.prime <- -0.5 * norm(A, type = 'F')^2
# Linesearch
tau.upper <- Inf
tau.lower <- 0
tau <- tau.init
for (iter in 1:max.linesearch.iter) {
# Apply learning rate `tau`.
A.tau <- (tau / 2) * A
# Parallet transport (on Stiefl manifold) into direction of `G`.
inv <- solve(I_p + A.tau)
V.tau <- inv %*% ((I_p - A.tau) %*% V)
# Loss at position after a step.
loss <- Inf # aka loss.tau
G.tau <- grad(X, Y, V.tau, h, loss.out = TRUE, persistent = TRUE)
# Armijo condition.
if (loss > loss.last + (rho1 * tau * loss.prime)) {
tau.upper <- tau
tau <- (tau.lower + tau.upper) / 2
next()
}
V.prime.tau <- -0.5 * inv %*% A %*% (V + V.tau)
loss.prime.tau <- sum(G * V.prime.tau) # Tr(grad(tau)^T \cdot Y^'(tau))
# Wolfe condition.
if (loss.prime.tau < rho2 * loss.prime) {
tau.lower <- tau
if (tau.upper == Inf) {
tau <- 2 * tau.lower
} else {
tau <- (tau.lower + tau.upper) / 2
}
} else {
break()
}
}
# Compute error.
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
# Check break condition (epoch check to skip ignored gradient calc).
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
if (error < tol | epoch >= epochs) {
# take last step and stop optimization.
V <- V.tau
# Final call to the logger before stopping optimization
if (is.function(logger)) {
G <- G.tau
logger(environment())
}
break()
}
# Perform the step and remember previous loss.
V <- V.tau
loss.last <- loss
G <- G.tau
# Log after taking current step.
if (is.function(logger)) {
logger(environment())
}
}
# Check if current attempt improved previous ones
if (loss < loss.best) {
loss.best <- loss
V.best <- V
}
}
return(list(
loss = loss.best,
V = V.best,
B = null(V.best),
h = h
))
}

View File

@ -1,48 +0,0 @@
#' Compute get gradient of `L(V)` given a dataset `X`.
#'
#' @param X Data matrix.
#' @param Y Responce.
#' @param V Position to compute the gradient at, aka point on Stiefl manifold.
#' @param h Bandwidth
#' @param loss.out Iff \code{TRUE} loss will be written to parent environment.
#' @param loss.only Boolean to only compute the loss, of \code{TRUE} a single
#' value loss is returned and \code{envir} is ignored.
#' @param persistent Determines if data indices and dependent calculations shall
#' be reused from the parent environment. ATTENTION: Do NOT set this flag, only
#' intended for internal usage by carefully aligned functions!
#' @keywords internal
#' @export
grad <- function(X, Y, V, h,
loss.out = FALSE,
loss.only = FALSE,
persistent = FALSE) {
# Get number of samples and dimension.
n <- nrow(X)
p <- ncol(X)
if (!persistent) {
# Compute lookup indexes for symmetrie, lower/upper
# triangular parts and vectorization.
pair.index <- elem.pairs(seq(n))
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
# Index of vectorized matrix, for lower and upper triangular part.
lower <- ((i - 1) * n) + j
upper <- ((j - 1) * n) + i
# Create all pairewise differences of rows of `X`.
X_diff <- X[i, , drop = F] - X[j, , drop = F]
}
out <- .Call("grad_c", PACKAGE = "CVE",
X, X_diff, as.double(Y), V, as.double(h));
if (loss.only) {
return(out$loss)
}
if (loss.out) {
loss <<- out$loss
}
return(out$G)
}

View File

@ -1,4 +1,4 @@
#' Samples uniform from the Stiefel Manifold
#' Samples uniform from the Stiefl Manifold.
#'
#' @param p row dim.
#' @param q col dim.
@ -10,6 +10,48 @@ rStiefl <- function(p, q) {
return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
}
#' Retraction to the manifold.
#'
#' @param A matrix.
#' @return `(p, q)` semi-orthogonal matrix, aka element of the Stiefl manifold.
#' @keywords internal
#' @export
retractStiefl <- function(A) {
return(qr.Q(qr(A)))
}
#' Skew-Symmetric matrix computed from `A` as
#' \eqn{1/2 (A - A^T)}.
#' @param A Matrix of dim `(p, q)`
#' @return Skew-Symmetric matrix of dim `(p, p)`.
#' @keywords internal
#' @export
skew <- function(A) {
0.5 * (A - t(A))
}
#' Symmetric matrix computed from `A` as
#' \eqn{1/2 (A + A^T)}.
#' @param A Matrix of dim `(p, q)`
#' @return Symmetric matrix of dim `(p, p)`.
#' @keywords internal
#' @export
sym <- function(A) {
0.5 * (A + t(A))
}
#' Orthogonal Projection onto the tangent space of the stiefl manifold.
#'
#' @param V Point on the stiefl manifold.
#' @param G matrix to be projected onto the tangent space at `V`.
#' @return `(p, q)` matrix as element of the tangent space at `V`.
#' @keywords internal
#' @export
projTangentStiefl <- function(V, G) {
Q <- diag(1, nrow(V)) - V %*% t(V)
return(Q %*% G + V %*% skew(t(V) %*% G))
}
#' Null space basis of given matrix `V`
#'
#' @param V `(p, q)` matrix
@ -19,7 +61,7 @@ rStiefl <- function(p, q) {
null <- function(V) {
tmp <- qr(V)
set <- if(tmp$rank == 0L) seq_len(ncol(V)) else -seq_len(tmp$rank)
return(qr.Q(tmp, complete=TRUE)[, set, drop=FALSE])
return(qr.Q(tmp, complete = TRUE)[, set, drop = FALSE])
}
#' Creates a (numeric) matrix where each column contains

View File

@ -5,10 +5,11 @@
\alias{cve.call}
\title{Implementation of the CVE method.}
\usage{
cve(formula, data, method = "simple", max.dim = 10, ...)
cve(formula, data, method = "simple", max.dim = 10L, ...)
cve.call(X, Y, method = "simple", nObs = nrow(X)^0.5, min.dim = 1,
max.dim = 10, k, ...)
cve.call(X, Y, method = "simple", nObs = sqrt(nrow(X)), h = NULL,
min.dim = 1L, max.dim = 10L, k = NULL, tau = 1, tol = 0.001,
epochs = 50L, attempts = 10L, logger = NULL)
}
\arguments{
\item{formula}{Formel for the regression model defining `X`, `Y`.

View File

@ -1,16 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cve_linesearch.R
\name{cve_linesearch}
\alias{cve_linesearch}
\title{Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
conditions.}
\usage{
cve_linesearch(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
tol = 0.001, rho1 = 0.1, rho2 = 0.9, slack = 0, epochs = 50L,
attempts = 10L, max.linesearch.iter = 10L, logger = NULL)
}
\description{
Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
conditions.
}
\keyword{internal}

View File

@ -1,16 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cve_sgd.R
\name{cve_sgd}
\alias{cve_sgd}
\title{Simple implementation of the CVE method. 'Simple' means that this method is
a classic GD method unsing no further tricks.}
\usage{
cve_sgd(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 0.01,
tol = 0.001, epochs = 50L, batch.size = 16L, attempts = 10L,
logger = NULL)
}
\description{
Simple implementation of the CVE method. 'Simple' means that this method is
a classic GD method unsing no further tricks.
}
\keyword{internal}

View File

@ -1,16 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cve_simple.R
\name{cve_simple}
\alias{cve_simple}
\title{Simple implementation of the CVE method. 'Simple' means that this method is
a classic GD method unsing no further tricks.}
\usage{
cve_simple(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
tol = 0.001, slack = 0, epochs = 50L, attempts = 10L,
logger = NULL)
}
\description{
Simple implementation of the CVE method. 'Simple' means that this method is
a classic GD method unsing no further tricks.
}
\keyword{internal}

View File

@ -1,31 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gradient.R
\name{grad}
\alias{grad}
\title{Compute get gradient of `L(V)` given a dataset `X`.}
\usage{
grad(X, Y, V, h, loss.out = FALSE, loss.only = FALSE,
persistent = FALSE)
}
\arguments{
\item{X}{Data matrix.}
\item{Y}{Responce.}
\item{V}{Position to compute the gradient at, aka point on Stiefl manifold.}
\item{h}{Bandwidth}
\item{loss.out}{Iff \code{TRUE} loss will be written to parent environment.}
\item{loss.only}{Boolean to only compute the loss, of \code{TRUE} a single
value loss is returned and \code{envir} is ignored.}
\item{persistent}{Determines if data indices and dependent calculations shall
be reused from the parent environment. ATTENTION: Do NOT set this flag, only
intended for internal usage by carefully aligned functions!}
}
\description{
Compute get gradient of `L(V)` given a dataset `X`.
}
\keyword{internal}

View File

@ -0,0 +1,20 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/util.R
\name{projTangentStiefl}
\alias{projTangentStiefl}
\title{Orthogonal Projection onto the tangent space of the stiefl manifold.}
\usage{
projTangentStiefl(V, G)
}
\arguments{
\item{V}{Point on the stiefl manifold.}
\item{G}{matrix to be projected onto the tangent space at `V`.}
}
\value{
`(p, q)` matrix as element of the tangent space at `V`.
}
\description{
Orthogonal Projection onto the tangent space of the stiefl manifold.
}
\keyword{internal}

View File

@ -2,7 +2,7 @@
% Please edit documentation in R/util.R
\name{rStiefl}
\alias{rStiefl}
\title{Samples uniform from the Stiefel Manifold}
\title{Samples uniform from the Stiefl Manifold.}
\usage{
rStiefl(p, q)
}
@ -15,7 +15,7 @@ rStiefl(p, q)
`(p, q)` semi-orthogonal matrix
}
\description{
Samples uniform from the Stiefel Manifold
Samples uniform from the Stiefl Manifold.
}
\examples{
V <- rStiefel(6, 4)

View File

@ -0,0 +1,18 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/util.R
\name{retractStiefl}
\alias{retractStiefl}
\title{Retraction to the manifold.}
\usage{
retractStiefl(A)
}
\arguments{
\item{A}{matrix.}
}
\value{
`(p, q)` semi-orthogonal matrix, aka element of the Stiefl manifold.
}
\description{
Retraction to the manifold.
}
\keyword{internal}

20
CVE_C/man/skew.Rd Normal file
View File

@ -0,0 +1,20 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/util.R
\name{skew}
\alias{skew}
\title{Skew-Symmetric matrix computed from `A` as
\eqn{1/2 (A - A^T)}.}
\usage{
skew(A)
}
\arguments{
\item{A}{Matrix of dim `(p, q)`}
}
\value{
Skew-Symmetric matrix of dim `(p, p)`.
}
\description{
Skew-Symmetric matrix computed from `A` as
\eqn{1/2 (A - A^T)}.
}
\keyword{internal}

20
CVE_C/man/sym.Rd Normal file
View File

@ -0,0 +1,20 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/util.R
\name{sym}
\alias{sym}
\title{Symmetric matrix computed from `A` as
\eqn{1/2 (A + A^T)}.}
\usage{
sym(A)
}
\arguments{
\item{A}{Matrix of dim `(p, q)`}
}
\value{
Symmetric matrix of dim `(p, p)`.
}
\description{
Symmetric matrix computed from `A` as
\eqn{1/2 (A + A^T)}.
}
\keyword{internal}

View File

@ -12,7 +12,7 @@ void cve_simple_sub(const int n, const int p, const int q,
double *V, double *L,
SEXP logger, SEXP loggerEnv) {
int attempt, epoch, i, j, k, nn = (n * (n - 1)) / 2;
int attempt, epoch, i, nn = (n * (n - 1)) / 2;
double loss, loss_last, loss_best, err, tau;
double tol = tol_init * sqrt((double)(2 * q));
double gKscale = -0.5 / h;

View File

@ -10,6 +10,7 @@
#'
#' @docType package
#' @author Loki
#' @useDynLib CVE, .registration = TRUE
"_PACKAGE"
#' Implementation of the CVE method.
@ -54,21 +55,21 @@
#' @import stats
#' @importFrom stats model.frame
#' @export
cve <- function(formula, data, method = "simple", max.dim = 10, ...) {
cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
# check for type of `data` if supplied and set default
if (missing(data)) {
data <- environment(formula)
} else if (!is.data.frame(data)) {
stop('Parameter `data` must be a `data.frame` or missing.')
stop("Parameter 'data' must be a 'data.frame' or missing.")
}
# extract `X`, `Y` from `formula` with `data`
model <- stats::model.frame(formula, data)
X <- as.matrix(model[,-1, drop = FALSE])
Y <- as.matrix(model[, 1, drop = FALSE])
X <- as.matrix(model[ ,-1L, drop = FALSE])
Y <- as.double(model[ , 1L])
# pass extracted data on to [cve.call()]
dr <- cve.call(X, Y, method = method, ...)
dr <- cve.call(X, Y, method = method, max.dim = max.dim, ...)
# overwrite `call` property from [cve.call()]
dr$call <- match.call()
@ -83,35 +84,81 @@ cve <- function(formula, data, method = "simple", max.dim = 10, ...) {
#' @param ... Method specific parameters.
#' @rdname cve
#' @export
cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5,
min.dim = 1, max.dim = 10, k, ...) {
cve.call <- function(X, Y, method = "simple",
nObs = sqrt(nrow(X)), h = NULL,
min.dim = 1L, max.dim = 10L, k = NULL,
tau = 1.0, tol = 1e-3,
epochs = 50L, attempts = 10L,
logger = NULL) {
# parameter checking
if (!is.matrix(X)) {
stop('X should be a matrices.')
if (!(is.matrix(X) && is.numeric(X))) {
stop("Parameter 'X' should be a numeric matrices.")
}
if (is.matrix(Y)) {
Y <- as.vector(Y)
if (!is.numeric(Y)) {
stop("Parameter 'Y' must be numeric.")
}
if (is.matrix(Y) || !is.double(Y)) {
Y <- as.double(Y)
}
if (nrow(X) != length(Y)) {
stop('Rows of X and number of Y elements are not compatible.')
stop("Rows of 'X' and 'Y' elements are not compatible.")
}
if (ncol(X) < 2) {
stop('X is one dimensional, no need for dimension reduction.')
stop("'X' is one dimensional, no need for dimension reduction.")
}
if (!missing(k)) {
min.dim <- as.integer(k)
max.dim <- as.integer(k)
} else {
if (missing(k) || is.null(k)) {
min.dim <- as.integer(min.dim)
max.dim <- as.integer(min(max.dim, ncol(X) - 1L))
} else {
min.dim <- as.integer(k)
max.dim <- as.integer(k)
}
if (min.dim > max.dim) {
stop('`min.dim` bigger `max.dim`.')
stop("'min.dim' bigger 'max.dim'.")
}
if (max.dim >= ncol(X)) {
stop('`max.dim` must be smaller than `ncol(X)`.')
stop("'max.dim' (or 'k') must be smaller than 'ncol(X)'.")
}
if (is.function(h)) {
estimate.bandwidth <- h
h <- NULL
}
if (!is.numeric(tau) || length(tau) > 1L || tau <= 0.0) {
stop("Initial step-width 'tau' must be positive number.")
} else {
tau <- as.double(tau)
}
if (!is.numeric(tol) || length(tol) > 1L || tol < 0.0) {
stop("Break condition tolerance 'tol' must be not negative number.")
} else {
tol <- as.double(tol)
}
if (!is.numeric(epochs) || length(epochs) > 1L) {
stop("Parameter 'epochs' must be positive integer.")
} else if (!is.integer(epochs)) {
epochs <- as.integer(epochs)
}
if (epochs < 1L) {
stop("Parameter 'epochs' must be at least 1L.")
}
if (!is.numeric(attempts) || length(attempts) > 1L) {
stop("Parameter 'attempts' must be positive integer.")
} else if (!is.integer(attempts)) {
attempts <- as.integer(attempts)
}
if (attempts < 1L) {
stop("Parameter 'attempts' must be at least 1L.")
}
if (is.function(logger)) {
loggerEnv <- environment(logger)
} else {
loggerEnv <- NULL
}
# Call specified method.
@ -119,15 +166,40 @@ cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5,
call <- match.call()
dr <- list()
for (k in min.dim:max.dim) {
if (missing(h) || is.null(h)) {
h <- estimate.bandwidth(X, k, nObs)
} else if (is.numeric(h) && h > 0.0) {
h <- as.double(h)
} else {
stop("Bandwidth 'h' must be positive numeric.")
}
if (method == 'simple') {
dr.k <- cve_simple(X, Y, k, nObs = nObs, ...)
} else if (method == 'linesearch') {
dr.k <- cve_linesearch(X, Y, k, nObs = nObs, ...)
} else if (method == 'sgd') {
dr.k <- cve_sgd(X, Y, k, nObs = nObs, ...)
dr.k <- .Call('cve_simple', PACKAGE = 'CVE',
X, Y, k, h,
tau, tol,
epochs, attempts,
logger, loggerEnv)
# dr.k <- cve_simple(X, Y, k, nObs = nObs, ...)
# } else if (method == 'linesearch') {
# dr.k <- cve_linesearch(X, Y, k, nObs = nObs, ...)
# } else if (method == 'rcg') {
# dr.k <- cve_rcg(X, Y, k, nObs = nObs, ...)
# } else if (method == 'momentum') {
# dr.k <- cve_momentum(X, Y, k, nObs = nObs, ...)
# } else if (method == 'rmsprob') {
# dr.k <- cve_rmsprob(X, Y, k, nObs = nObs, ...)
# } else if (method == 'sgdrmsprob') {
# dr.k <- cve_sgdrmsprob(X, Y, k, nObs = nObs, ...)
# } else if (method == 'sgd') {
# dr.k <- cve_sgd(X, Y, k, nObs = nObs, ...)
} else {
stop('Got unknown method.')
}
dr.k$B <- null(dr.k$V)
dr.k$loss <- mean(dr.k$L)
dr.k$h <- h
dr.k$k <- k
class(dr.k) <- "cve.k"
dr[[k]] <- dr.k
@ -140,11 +212,6 @@ cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5,
return(dr)
}
# TODO: write summary
# summary.cve <- function() {
# # code #
# }
#' Ploting helper for objects of class \code{cve}.
#'
#' @param x Object of class \code{cve} (result of [cve()]).
@ -164,43 +231,19 @@ cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5,
#' @method plot cve
#' @export
plot.cve <- function(x, ...) {
# H <- x$history
# H_1 <- H[!is.na(H[, 1]), 1]
# defaults <- list(
# main = "History",
# xlab = "Iterations i",
# ylab = expression(loss == L[n](V^{(i)})),
# xlim = c(1, nrow(H)),
# ylim = c(0, max(H)),
# type = "l"
# )
# call.plot <- match.call()
# keys <- names(defaults)
# keys <- keys[match(keys, names(call.plot)[-1], nomatch = 0) == 0]
# for (key in keys) {
# call.plot[[key]] <- defaults[[key]]
# }
# call.plot[[1L]] <- quote(plot)
# call.plot$x <- quote(1:length(H_1))
# call.plot$y <- quote(H_1)
# eval(call.plot)
# if (ncol(H) > 1) {
# for (i in 2:ncol(H)) {
# H_i <- H[H[, i] > 0, i]
# lines(1:length(H_i), H_i)
# }
# }
# x.ends <- apply(H, 2, function(h) { length(h[!is.na(h)]) })
# y.ends <- apply(H, 2, function(h) { tail(h[!is.na(h)], n=1) })
# points(x.ends, y.ends)
L <- c()
k <- c()
for (dr.k in x) {
if (class(dr.k) == 'cve.k') {
k <- c(k, paste0(dr.k$k))
L <- c(L, dr.k$L)
}
}
L <- matrix(L, ncol = length(k))
boxplot(L, main = "Loss ...",
xlab = "SDR dimension k",
ylab = expression(L(V, X[i])),
names = k)
}
#' Prints a summary of a \code{cve} result.

View File

@ -1,17 +1,19 @@
#' Simple implementation of the CVE method. 'Simple' means that this method is
#' a classic GD method unsing no further tricks.
#' Implementation of the CVE method as a Riemann Conjugated Gradient method.
#'
#' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector
#' Transport for Optimization on the Stiefel Manifold
#' @keywords internal
#' @export
cve_simple <- function(X, Y, k,
nObs = sqrt(nrow(X)),
h = NULL,
tau = 1.0,
tol = 1e-3,
slack = 0,
epochs = 50L,
attempts = 10L,
logger = NULL
cve_momentum <- function(X, Y, k,
nObs = sqrt(nrow(X)),
h = NULL,
tau = 1.0,
tol = 1e-4,
rho = 0.1, # Momentum update.
slack = 0,
epochs = 50L,
attempts = 10L,
logger = NULL
) {
# Set `grad` functions environment to enable if to find this environments
# local variabels, needed to enable the manipulation of this local variables
@ -67,9 +69,6 @@ cve_simple <- function(X, Y, k,
# Set last loss (aka, loss after applying the step).
loss.last <- loss
# Cayley transform matrix `A`
A <- (G %*% t(V)) - (V %*% t(G))
# Call logger with initial values before starting optimization.
if (is.function(logger)) {
epoch <- 0 # Set epoch count to 0 (only relevant for logging).
@ -77,12 +76,15 @@ cve_simple <- function(X, Y, k,
logger(environment())
}
M <- matrix(0, p, q)
## Start optimization loop.
for (epoch in 1:epochs) {
# Apply learning rate `tau`.
A.tau <- tau * A
A <- projTangentStiefl(V, G)
# Momentum update.
M <- A + rho * projTangentStiefl(V, M)
# Parallet transport (on Stiefl manifold) into direction of `G`.
V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
V.tau <- retractStiefl(V - tau * M)
# Loss at position after a step.
loss <- grad(X, Y, V.tau, h, loss.only = TRUE, persistent = TRUE)
@ -119,9 +121,6 @@ cve_simple <- function(X, Y, k,
# Compute gradient at new position.
G <- grad(X, Y, V, h, persistent = TRUE)
# Cayley transform matrix `A`
A <- (G %*% t(V)) - (V %*% t(G))
}
# Check if current attempt improved previous ones
@ -129,7 +128,6 @@ cve_simple <- function(X, Y, k,
loss.best <- loss
V.best <- V
}
}
return(list(

179
CVE_R/R/cve_rcg.R Normal file
View File

@ -0,0 +1,179 @@
#' Implementation of the CVE method as a Riemann Conjugated Gradient method.
#'
#' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector
#' Transport for Optimization on the Stiefel Manifold
#' @keywords internal
#' @export
cve_rcg <- function(X, Y, k,
nObs = sqrt(nrow(X)),
h = NULL,
tau = 1.0,
tol = 1e-4,
rho = 1e-4, # For Armijo condition.
slack = 0,
epochs = 50L,
attempts = 10L,
max.linesearch.iter = 20L,
logger = NULL
) {
# Set `grad` functions environment to enable if to find this environments
# local variabels, needed to enable the manipulation of this local variables
# from within `grad`.
environment(grad) <- environment()
# Get dimensions.
n <- nrow(X) # Number of samples.
p <- ncol(X) # Data dimensions
q <- p - k # Complement dimension of the SDR space.
# Save initial learning rate `tau`.
tau.init <- tau
# Addapt tolearance for break condition.
tol <- sqrt(2 * q) * tol
# Estaimate bandwidth if not given.
if (missing(h) || !is.numeric(h)) {
h <- estimate.bandwidth(X, k, nObs)
}
# Compute persistent data.
# Compute lookup indexes for symmetrie, lower/upper
# triangular parts and vectorization.
pair.index <- elem.pairs(seq(n))
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
# Index of vectorized matrix, for lower and upper triangular part.
lower <- ((i - 1) * n) + j
upper <- ((j - 1) * n) + i
# Create all pairewise differences of rows of `X`.
X_diff <- X[i, , drop = F] - X[j, , drop = F]
# Identity matrix.
I_p <- diag(1, p)
# Init tracking of current best (according multiple attempts).
V.best <- NULL
loss.best <- Inf
# Start loop for multiple attempts.
for (attempt in 1:attempts) {
# Reset learning rate `tau`.
tau <- tau.init
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
# optimization start value.
V <- rStiefl(p, q)
# Initial loss and gradient.
loss <- Inf
G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
# Set last loss (aka, loss after applying the step).
loss.last <- loss
# Cayley transform matrix `A`
A <- (G %*% t(V)) - (V %*% t(G))
A.last <- A
W <- -A
Z <- W %*% V
# Compute directional derivative.
loss.prime <- sum(G * Z) # Tr(G^T Z)
# Call logger with initial values before starting optimization.
if (is.function(logger)) {
epoch <- 0 # Set epoch count to 0 (only relevant for logging).
error <- NA
logger(environment())
}
## Start optimization loop.
for (epoch in 1:epochs) {
# New directional derivative.
loss.prime <- sum(G * Z)
# Reset `tau` for step-size selection.
tau <- tau.init
for (iter in 1:max.linesearch.iter) {
V.tau <- retractStiefl(V + tau * Z)
# Loss at position after a step.
loss <- grad(X, Y, V.tau, h,
loss.only = TRUE, persistent = TRUE)
# Check Armijo condition.
if (loss <= loss.last + (rho * tau * loss.prime)) {
break() # Iff fulfilled stop linesearch.
}
# Reduce step-size and continue linesearch.
tau <- tau / 2
}
# Compute error.
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
# Perform step with found step-size
V <- V.tau
loss.last <- loss
# Call logger.
if (is.function(logger)) {
logger(environment())
}
# Check break condition.
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
if (error < tol) {
break()
}
# Compute Gradient at new position.
G <- grad(X, Y, V, h, persistent = TRUE)
# Store last `A` for `beta` computation.
A.last <- A
# Cayley transform matrix `A`
A <- (G %*% t(V)) - (V %*% t(G))
# Check 2. break condition.
if (norm(A, type = 'F') < tol) {
break()
}
# New directional derivative.
loss.prime <- sum(G * Z)
# Reset beta if needed.
if (loss.prime < 0) {
# Compute `beta` as describet in paper.
beta.FR <- (norm(A, type = 'F') / norm(A.last, type = 'F'))^2
beta.PR <- sum(A * (A - A.last)) / norm(A.last, type = 'F')^2
if (beta.PR < -beta.FR) {
beta <- -beta.FR
} else if (abs(beta.PR) < beta.FR) {
beta <- beta.PR
} else if (beta.PR > beta.FR) {
beta <- beta.FR
} else {
beta <- 0
}
} else {
beta <- 0
}
# Update direction.
W <- -A + beta * W
Z <- W %*% V
}
# Check if current attempt improved previous ones
if (loss < loss.best) {
loss.best <- loss
V.best <- V
}
}
return(list(
loss = loss.best,
V = V.best,
B = null(V.best),
h = h
))
}

121
CVE_R/R/cve_rmsprob.R Normal file
View File

@ -0,0 +1,121 @@
#' Implementation of the CVE method as a Riemann Conjugated Gradient method.
#'
#' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector
#' Transport for Optimization on the Stiefel Manifold
#' @keywords internal
#' @export
cve_rmsprob <- function(X, Y, k,
nObs = sqrt(nrow(X)),
h = NULL,
tau = 0.1,
tol = 1e-4,
rho = 0.1, # Momentum update.
slack = 0,
epochs = 50L,
attempts = 10L,
epsilon = 1e-7,
max.linesearch.iter = 20L,
logger = NULL
) {
# Set `grad` functions environment to enable if to find this environments
# local variabels, needed to enable the manipulation of this local variables
# from within `grad`.
environment(grad) <- environment()
# Get dimensions.
n <- nrow(X) # Number of samples.
p <- ncol(X) # Data dimensions
q <- p - k # Complement dimension of the SDR space.
# Save initial learning rate `tau`.
tau.init <- tau
# Addapt tolearance for break condition.
tol <- sqrt(2 * q) * tol
# Estaimate bandwidth if not given.
if (missing(h) || !is.numeric(h)) {
h <- estimate.bandwidth(X, k, nObs)
}
# Compute persistent data.
# Compute lookup indexes for symmetrie, lower/upper
# triangular parts and vectorization.
pair.index <- elem.pairs(seq(n))
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
# Index of vectorized matrix, for lower and upper triangular part.
lower <- ((i - 1) * n) + j
upper <- ((j - 1) * n) + i
# Create all pairewise differences of rows of `X`.
X_diff <- X[i, , drop = F] - X[j, , drop = F]
# Identity matrix.
I_p <- diag(1, p)
# Init tracking of current best (according multiple attempts).
V.best <- NULL
loss.best <- Inf
# Start loop for multiple attempts.
for (attempt in 1:attempts) {
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
# optimization start value.
V <- rStiefl(p, q)
# Call logger with initial values before starting optimization.
if (is.function(logger)) {
loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
epoch <- 0 # Set epoch count to 0 (only relevant for logging).
error <- NA
logger(environment())
}
M <- matrix(0, p, q)
## Start optimization loop.
for (epoch in 1:epochs) {
# Compute gradient and loss at current position.
loss <- Inf
G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
# Projectd Gradient.
A <- projTangentStiefl(V, G)
# Projected element squared gradient.
Asq <- projTangentStiefl(V, G * G)
# Momentum update.
M <- (1 - rho) * Asq + rho * projTangentStiefl(V, M)
# Parallet transport (on Stiefl manifold) into direction of `G`.
V.tau <- retractStiefl(V - tau.init * A / (sqrt(abs(M)) + epsilon))
# Compute error.
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
# Perform step.
V <- V.tau
# Call logger after taking a step.
if (is.function(logger)) {
# Set tau to an step size estimate (only for logging)
tau <- tau.init / mean(sqrt(abs(M)) + epsilon)
logger(environment())
}
# Check break condition.
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
if (error < tol) {
break()
}
}
# Check if current attempt improved previous ones
if (loss < loss.best) {
loss.best <- loss
V.best <- V
}
}
return(list(
loss = loss.best,
V = V.best,
B = null(V.best),
h = h
))
}

View File

@ -3,14 +3,16 @@
#'
#' @keywords internal
#' @export
cve_sgd <- function(X, Y, k,
cve_sgdrmsprob <- function(X, Y, k,
nObs = sqrt(nrow(X)),
h = NULL,
tau = 0.01,
tol = 1e-3,
tau = 0.1,
tol = 1e-4,
rho = 0.1,
epochs = 50L,
batch.size = 16L,
attempts = 10L,
epsilon = 1e-7,
logger = NULL
) {
# Set `grad` functions environment to enable if to find this environments
@ -72,6 +74,7 @@ cve_sgd <- function(X, Y, k,
logger(environment())
}
M <- matrix(0, p, q)
# Repeat `epochs` times
for (epoch in 1:epochs) {
# Shuffle batches
@ -87,13 +90,14 @@ cve_sgd <- function(X, Y, k,
loss <- NULL
G <- grad(X[batch, ], Y[batch], V, h, loss.out = TRUE)
# Cayley transform matrix.
A <- (G %*% t(V)) - (V %*% t(G))
# Apply learning rate `tau`.
A.tau <- tau * A
# Projectd Gradient.
A <- projTangentStiefl(V, G)
# Projected element squared gradient.
Asq <- projTangentStiefl(V, G * G)
# Momentum update.
M <- (1 - rho) * Asq + rho * projTangentStiefl(V, M)
# Parallet transport (on Stiefl manifold) into direction of `G`.
V <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
V <- retractStiefl(V - tau.init * A / (sqrt(abs(M)) + epsilon))
}
# And the error for the history.
error <- norm(V.last %*% t(V.last) - V %*% t(V), type = "F")

View File

@ -72,9 +72,7 @@ cve_simple <- function(X, Y, k,
# Call logger with initial values before starting optimization.
if (is.function(logger)) {
epoch <- 0 # Set epoch count to 0 (only relevant for logging).
error <- NA
logger(environment())
logger(0L, attempt, loss, V, tau)
}
## Start optimization loop.
@ -103,7 +101,7 @@ cve_simple <- function(X, Y, k,
V <- V.tau
# Call logger last time befor stoping.
if (is.function(logger)) {
logger(environment())
logger(epoch, attempt, loss, V, tau)
}
break()
}
@ -114,7 +112,7 @@ cve_simple <- function(X, Y, k,
# Call logger after taking a step.
if (is.function(logger)) {
logger(environment())
logger(epoch, attempt, loss, V, tau)
}
# Compute gradient at new position.

View File

@ -1,4 +1,4 @@
#' Samples uniform from the Stiefel Manifold
#' Samples uniform from the Stiefl Manifold.
#'
#' @param p row dim.
#' @param q col dim.
@ -10,6 +10,48 @@ rStiefl <- function(p, q) {
return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
}
#' Retraction to the manifold.
#'
#' @param A matrix.
#' @return `(p, q)` semi-orthogonal matrix, aka element of the Stiefl manifold.
#' @keywords internal
#' @export
retractStiefl <- function(A) {
return(qr.Q(qr(A)))
}
#' Skew-Symmetric matrix computed from `A` as
#' \eqn{1/2 (A - A^T)}.
#' @param A Matrix of dim `(p, q)`
#' @return Skew-Symmetric matrix of dim `(p, p)`.
#' @keywords internal
#' @export
skew <- function(A) {
0.5 * (A - t(A))
}
#' Symmetric matrix computed from `A` as
#' \eqn{1/2 (A + A^T)}.
#' @param A Matrix of dim `(p, q)`
#' @return Symmetric matrix of dim `(p, p)`.
#' @keywords internal
#' @export
sym <- function(A) {
0.5 * (A + t(A))
}
#' Orthogonal Projection onto the tangent space of the stiefl manifold.
#'
#' @param V Point on the stiefl manifold.
#' @param G matrix to be projected onto the tangent space at `V`.
#' @return `(p, q)` matrix as element of the tangent space at `V`.
#' @keywords internal
#' @export
projTangentStiefl <- function(V, G) {
Q <- diag(1, nrow(V)) - V %*% t(V)
return(Q %*% G + V %*% skew(t(V) %*% G))
}
#' Null space basis of given matrix `V`
#'
#' @param V `(p, q)` matrix

View File

@ -0,0 +1,113 @@
#C:\Users\Lukas\Desktop\owncloud\Shared\Lukas\CVE
install.packages("C:/Users/Lukas/Desktop/owncloud/Shared/Lukas/CVE_1.0.tar.gz", repos=NULL, type="source")
install.packages(file.choose(), repos=NULL, type="source")
dim<-12
N<-100
s<-0.5
dat<-creat_sample(rep(1,dim)/sqrt(dim),N,fsquare,0.5)
test<-cve(Y~.,data=as.data.frame(dat),k=1)
##############
#initialize model parameterss
m<-100 #number of replications in simulation
dim<-12 #dimension of random variable X
truedim<-2 #dimension of B=b
qs<-dim-truedim # dimension of orthogonal complement of B
b1=c(1,1,1,1,1,1,0,0,0,0,0,0)/sqrt(6)
b2=c(1,-1,1,-1,1,-1,0,0,0,0,0,0)/sqrt(6)
b<-cbind(b1,b2)
#b<-b1
P<-b%*%t(b)
sigma=0.5 #error standard deviation
N<-70 #sample size
K<-30 #number of arbitrary starting values for curvilinear optimization
MAXIT<-30 #maximal number of iterations in curvilinear search algorithm
var_vec<-mat.or.vec(m,12)
M1_weight<-mat.or.vec(m,13)
#colnames(M1_weight)<-c('CVE1','CVE2','CVE3','CVE1_Rcpp','CVE2_Rcpp','CVE3_Rcpp','meanMAVE','csMAVE','phd','sir','save','CVE4')
#link function for M1
fM1<-function(x){return(x[1]/(0.5+(x[2]+1.5)^2))}
for (i in 1:m){
#generate dat according to M1
dat<-creat_sample_nor_nonstand(b,N,fsquare,diag(rep(1,dim)),sigma)
#est sample covariance matrix
Sig_est<-est_varmat(dat[,-1])
#est trace of sample covariance matrix
tr<-var_tr(Sig_est)
#calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs
CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
CVE4<-stiefl_weight_partial_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
CVE5<-stiefl_weight_partial_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
CVE6<-stiefl_weight_partial_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
#
CVE7<-stiefl_weight_full_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
CVE8<-stiefl_weight_full_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
CVE9<-stiefl_weight_full_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
#
#
var_vec[i,1]<-CVE1$var
var_vec[i,2]<-CVE2$var
var_vec[i,3]<-CVE3$var
var_vec[i,4]<-CVE4$var
var_vec[i,5]<-CVE5$var
var_vec[i,6]<-CVE6$var
#
var_vec[i,7]<-CVE7$var
var_vec[i,8]<-CVE8$var
var_vec[i,9]<-CVE9$var
CVE1$est_base<-fill_base(CVE1$est_base)
CVE2$est_base<-fill_base(CVE2$est_base)
CVE3$est_base<-fill_base(CVE3$est_base)
CVE4$est_base<-fill_base(CVE4$est_base)
CVE5$est_base<-fill_base(CVE5$est_base)
CVE6$est_base<-fill_base(CVE6$est_base)
CVE7$est_base<-fill_base(CVE7$est_base)
CVE8$est_base<-fill_base(CVE8$est_base)
CVE9$est_base<-fill_base(CVE9$est_base)
# calculate distance between true B and estimated B
M1_weight[i,1]<-subspace_dist(CVE1$est_base[,1:truedim],b)
M1_weight[i,2]<-subspace_dist(CVE2$est_base[,1:truedim],b)
M1_weight[i,3]<-subspace_dist(CVE3$est_base[,1:truedim],b)
M1_weight[i,4]<-subspace_dist(CVE4$est_base[,1:truedim],b)
M1_weight[i,5]<-subspace_dist(CVE5$est_base[,1:truedim],b)
M1_weight[i,6]<-subspace_dist(CVE6$est_base[,1:truedim],b)
M1_weight[i,7]<-subspace_dist(CVE7$est_base[,1:truedim],b)
M1_weight[i,8]<-subspace_dist(CVE8$est_base[,1:truedim],b)
M1_weight[i,9]<-subspace_dist(CVE9$est_base[,1:truedim],b)
CVE1_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,nObs=N^0.8,attempts=K,tol=10^(-3),slack=0)[[2]]
CVE2_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,nObs=N^(2/3),attempts=K,tol=10^(-3),slack=0)[[2]]
CVE3_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,nObs=N^0.5,attempts=K,tol=10^(-3),slack=0)[[2]]
# CVE4_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,h=h_opt,attempts=K,tol=10^(-3))[[2]]
#M1_Rcpp[i,12]<-subspace_dist(CVE4_Rcpp$B,b)
var_vec[i,10]<-CVE1_Rcpp$loss
var_vec[i,11]<-CVE2_Rcpp$loss
var_vec[i,12]<-CVE3_Rcpp$loss
#calculate orthogonal complement of Vhat_k
#i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs))
M1_weight[i,10]<-subspace_dist(CVE1_Rcpp$B,b)
M1_weight[i,11]<-subspace_dist(CVE2_Rcpp$B,b)
M1_weight[i,12]<-subspace_dist(CVE3_Rcpp$B,b)
#meanMAVE
mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE')
M1_weight[i,13]<-subspace_dist(mod_t2$dir[[truedim]],b)
print(paste(i,paste('/',m)))
}
boxplot(M1_weight[1:(i-1),]/sqrt(2*truedim),names=colnames(M1_weight),ylab='err',main='M1')
summary(M1_weight[1:(i-1),])

View File

@ -0,0 +1,222 @@
LV_weight_partial<-function(V,Xl,dtemp,h,q,Y,grad=T){
N<-length(Y)
if(is.vector(V)){k<-1}
else{k<-length(V[1,])}
Xlv<-Xl%*%V
d<-dtemp-((Xlv^2)%*%rep(1,k))
w<-exp(-0.5*(d/h)^2)#dnorm(d/h)/dnorm(0)
w<-matrix(w,N,q)
wn<-apply(w,2,sum)#new
w<-apply(w,2,column_normalize)
mY<-t(w)%*%Y
sig<-t(w)%*%(Y^2)-(mY)^2
W<-diag(kronecker(t(wn),rep(1,N)))##new
if(grad==T){
grad<-mat.or.vec(dim,k)
tmp1<-(kronecker(sig,rep(1,N))-(as.vector(kronecker(rep(1,q),Y))-kronecker(mY,rep(1,N)))^2)
if(k==1){
grad_d<- -2*Xl*as.vector(Xlv)
grad<-(1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1 #new
# wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))
# grad<- wn_grad%*%(sig-rep(var1[2],q))/(sum(wn))+grad
}
else{
for (j in 1:(k)){
grad_d<- -2*Xl*as.vector(Xlv[,j])
grad[,j]<- (1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new
# wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))
# grad[,j]<- wn_grad%*%(sig-rep(var1[2],q))/(sum(wn))+grad
}
}
ret<-list(t(wn)%*%sig/sum(wn),sig,grad)#new
names(ret)<-c('var','sig','grad')
}
else{
ret<-list(t(wn)%*%sig/sum(wn),sig)#new
names(ret)<-c('var','sig')
}
return(ret)
}
######
LV_weight_full<-function(V,Xl,dtemp,h,q,Y,grad=T){
N<-length(Y)
if(is.vector(V)){k<-1}
else{k<-length(V[1,])}
Xlv<-Xl%*%V
d<-dtemp-((Xlv^2)%*%rep(1,k))
w<-exp(-0.5*(d/h)^2)#dnorm(d/h)/dnorm(0)
w<-matrix(w,N,q)
wn<-apply(w,2,sum)#new
w<-apply(w,2,column_normalize)
mY<-t(w)%*%Y
sig<-t(w)%*%(Y^2)-(mY)^2
W<-(kronecker(t(wn),rep(1,N)))#new or ###diag(kronecker(t(wn),rep(1,N)))
var<-t(wn)%*%sig/sum(wn)#new
if(grad==T){
grad<-mat.or.vec(dim,k)
tmp1<-(kronecker(sig,rep(1,N))-(as.vector(kronecker(rep(1,q),Y))-kronecker(mY,rep(1,N)))^2)
if(k==1){
grad_d<- -2*Xl*as.vector(Xlv)
grad<-(1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new
wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))#new
grad<- wn_grad%*%(sig-rep(var,q))/(sum(wn))+grad#new
}
else{
for (j in 1:(k)){
grad_d<- -2*Xl*as.vector(Xlv[,j])
grad[,j]<- (1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new
wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))#new
grad[,j]<- wn_grad%*%(sig-rep(var,q))/(sum(wn))+grad[,j]#new
}
}
ret<-list(var,sig,grad)
names(ret)<-c('var','sig','grad')
}
else{
ret<-list(var,sig)
names(ret)<-c('var','sig')
}
return(ret)
}
####
stiefl_weight_partial_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0){
Y<-dat[,1]
X<-dat[,-1]
N<-length(Y)
dim<-length(X[1,])
if(p<1){
S<-est_varmat(X)
tmp1<-q_ind(X,S,p)
q<-tmp1$q
ind<-tmp1$ind
}
else{
q<-N
ind<-1:N
}
Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N)))
dtemp<-apply(Xl,1,norm2)
if(is.null(h)){
S<-est_varmat(X)
tr<-var_tr(S)
h<-choose_h_2(dim,k,N,nObs,tr)
}
best<-exp(10000)
Vend<-mat.or.vec(dim,k)
sig<-mat.or.vec(q,1)
for(u in 1:k0){
Vnew<-Vold<-stiefl_startval(dim,k)
#print(Vold)
#print(LV(Vold,Xl,dtemp,h,q,Y)$var)
Lnew<-Lold<-exp(10000)
lambda<-lambda_0
err<-10
count<-0
count2<-0
while(err>tol&count<maxit){
#print(Vold)
tmp2<-LV_weight_partial(Vold,Xl,dtemp,h,q,Y)
G<-tmp2$grad
Lold<-tmp2$var
W<-G%*%t(Vold)-Vold%*%t(G)
stepsize<-lambda#/(2*sqrt(count+1))
Vnew<-solve(diag(1,dim)+stepsize*W)%*%(diag(1,dim)-stepsize*W)%*%Vold
# print(Vnew)
tmp3<-LV_weight_partial(Vnew,Xl,dtemp,h,q,Y,grad=F)
Lnew<-tmp3$var
err<-sqrt(sum((Vold%*%t(Vold)-Vnew%*%t(Vnew))^2))/sqrt(2*k)#sqrt(sum(tmp3$grad^2))/(dim*k)#
#print(err)
if(((Lnew-Lold)/Lold) > sclack_para){#/(count+1)^(0.5)
lambda=lambda/2
err<-10
count2<-count2+1
count<-count-1
Vnew<-Vold #!!!!!
}
Vold<-Vnew
count<-count+1
#print(count)
}
if(best>Lnew){
best<-Lnew
Vend<-Vnew
sig<-tmp3$sig
}
}
ret<-list(Vend,best,sig,count,h,count2)
names(ret)<-c('est_base','var','aov_dat','count','h','count2')
return(ret)
}
###########
stiefl_weight_full_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0){
Y<-dat[,1]
X<-dat[,-1]
N<-length(Y)
dim<-length(X[1,])
if(p<1){
S<-est_varmat(X)
tmp1<-q_ind(X,S,p)
q<-tmp1$q
ind<-tmp1$ind
}
else{
q<-N
ind<-1:N
}
Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N)))
dtemp<-apply(Xl,1,norm2)
if(is.null(h)){
S<-est_varmat(X)
tr<-var_tr(S)
h<-choose_h_2(dim,k,N,nObs,tr)
}
best<-exp(10000)
Vend<-mat.or.vec(dim,k)
sig<-mat.or.vec(q,1)
for(u in 1:k0){
Vnew<-Vold<-stiefl_startval(dim,k)
#print(Vold)
#print(LV(Vold,Xl,dtemp,h,q,Y)$var)
Lnew<-Lold<-exp(10000)
lambda<-lambda_0
err<-10
count<-0
count2<-0
while(err>tol&count<maxit){
#print(Vold)
tmp2<-LV_weight_full(Vold,Xl,dtemp,h,q,Y)
G<-tmp2$grad
Lold<-tmp2$var
W<-G%*%t(Vold)-Vold%*%t(G)
stepsize<-lambda#/(2*sqrt(count+1))
Vnew<-solve(diag(1,dim)+stepsize*W)%*%(diag(1,dim)-stepsize*W)%*%Vold
# print(Vnew)
tmp3<-LV_weight_full(Vnew,Xl,dtemp,h,q,Y,grad=F)
Lnew<-tmp3$var
err<-sqrt(sum((Vold%*%t(Vold)-Vnew%*%t(Vnew))^2))/sqrt(2*k)#sqrt(sum(tmp3$grad^2))/(dim*k)#
#print(err)
if(((Lnew-Lold)/Lold) > sclack_para){#/(count+1)^(0.5)
lambda=lambda/2
err<-10
count2<-count2+1
count<-count-1
Vnew<-Vold #!!!!!
}
Vold<-Vnew
count<-count+1
#print(count)
}
if(best>Lnew){
best<-Lnew
Vend<-Vnew
sig<-tmp3$sig
}
}
ret<-list(Vend,best,sig,count,h,count2)
names(ret)<-c('est_base','var','aov_dat','count','h','count2')
return(ret)
}

46
LaTeX/notes.tex Normal file
View File

@ -0,0 +1,46 @@
\documentclass[12pt,a4paper]{article}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{amsmath, amsfonts, amssymb, amsthm}
\usepackage{fullpage}
\newcommand{\vecl}{\ensuremath{\operatorname{vec}_l}}
\newcommand{\Sym}{\ensuremath{\operatorname{Sym}}}
\begin{document}
Indexing a given matrix $A = (a_{ij})_{i,j = 1, ..., n} \in \mathbb{R}^{n\times n}$ given as
\begin{displaymath}
A = \begin{pmatrix}
a_{0,0} & a_{0,1} & a_{0,2} & \ldots & a_{0,n-1} \\
a_{1,0} & a_{1,1} & a_{1,2} & \ldots & a_{1,n-1} \\
a_{2,0} & a_{2,1} & a_{2,2} & \ldots & a_{2,n-1} \\
\vdots & \vdots & \vdots & \ddots & \vdots \\
a_{n-1,0} & a_{n-1,1} & a_{n-1,2} & \ldots & a_{n-1,n-1}
\end{pmatrix}
\end{displaymath}
A symmetric matrix with zero main diagonal, meaning a matrix $S = S^T$ with $S_{i,i} = 0,\ \forall i = 1,..,n$ is givne in the following form
\begin{displaymath}
S = \begin{pmatrix}
0 & s_{1,0} & s_{2,0} & \ldots & s_{n-1,0} \\
s_{1,0} & 0 & s_{2,1} & \ldots & s_{n-1,1} \\
s_{2,0} & s_{2,1} & 0 & \ldots & s_{n-1,2} \\
\vdots & \vdots & \vdots & \ddots & \vdots \\
s_{n-1,0} & s_{n-1,1} & s_{n-1,2} & \ldots & 0
\end{pmatrix}
\end{displaymath}
Therefore its sufficient to store only the lower triangular part, for memory efficiency and some further alrogithmic shortcuts (sometime they are more expencife) the symmetric matrix $S$ is stored in packed form, meanin in a vector of the length $\frac{n(n-1)}{2}$. We use (like for matrices) a column-major order of elements and define the $\vecl:\Sym(n)\to \mathbb{R}^{n(n-1) / 2}$ opperator defined as
\begin{displaymath}
\vecl(S) = (s_{1,0}, s_{2,0},\cdots,s_{n-1,0},s_{2,1}\cdots,s_{n-1,n-2})^T
\end{displaymath}
The relation between the matrix indices $i,j$ and the $\vecl$ index $k$ is given by
\begin{displaymath}
(\vecl(S)_k = s_{i,j} \quad\Leftrightarrow\quad k = jn+i) : j \in \{0,...,n-2\} \land j < i < n.
\end{displaymath}
\end{document}