cleanup
This commit is contained in:
parent
8db5266f8e
commit
50302ce18a
|
@ -5,15 +5,15 @@ S3method(summary,cve)
|
|||
export(cve)
|
||||
export(cve.call)
|
||||
export(cve.grid.search)
|
||||
export(cve_linesearch)
|
||||
export(cve_sgd)
|
||||
export(cve_simple)
|
||||
export(dataset)
|
||||
export(elem.pairs)
|
||||
export(estimate.bandwidth)
|
||||
export(grad)
|
||||
export(null)
|
||||
export(projTangentStiefl)
|
||||
export(rStiefl)
|
||||
export(retractStiefl)
|
||||
export(skew)
|
||||
export(sym)
|
||||
import(stats)
|
||||
importFrom(graphics,lines)
|
||||
importFrom(graphics,plot)
|
||||
|
|
|
@ -1,169 +0,0 @@
|
|||
#' Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
|
||||
#' conditions.
|
||||
#'
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
cve_linesearch <- function(X, Y, k,
|
||||
nObs = sqrt(nrow(X)),
|
||||
h = NULL,
|
||||
tau = 1.0,
|
||||
tol = 1e-3,
|
||||
rho1 = 0.1,
|
||||
rho2 = 0.9,
|
||||
slack = 0,
|
||||
epochs = 50L,
|
||||
attempts = 10L,
|
||||
max.linesearch.iter = 10L,
|
||||
logger = NULL
|
||||
) {
|
||||
# Set `grad` functions environment to enable if to find this environments
|
||||
# local variabels, needed to enable the manipulation of this local variables
|
||||
# from within `grad`.
|
||||
environment(grad) <- environment()
|
||||
|
||||
# Get dimensions.
|
||||
n <- nrow(X)
|
||||
p <- ncol(X)
|
||||
q <- p - k
|
||||
|
||||
# Save initial learning rate `tau`.
|
||||
tau.init <- tau
|
||||
# Addapt tolearance for break condition.
|
||||
tol <- sqrt(2 * q) * tol
|
||||
|
||||
# Estaimate bandwidth if not given.
|
||||
if (missing(h) | !is.numeric(h)) {
|
||||
h <- estimate.bandwidth(X, k, nObs)
|
||||
}
|
||||
|
||||
# Compute persistent data.
|
||||
# Compute lookup indexes for symmetrie, lower/upper
|
||||
# triangular parts and vectorization.
|
||||
pair.index <- elem.pairs(seq(n))
|
||||
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
|
||||
# Matrix of vectorized indices. (vec(index) -> seq)
|
||||
index <- matrix(seq(n * n), n, n)
|
||||
lower <- index[lower.tri(index)]
|
||||
upper <- t(index)[lower]
|
||||
|
||||
# Create all pairewise differences of rows of `X`.
|
||||
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||
# Identity matrix.
|
||||
I_p <- diag(1, p)
|
||||
|
||||
# Init tracking of current best (according multiple attempts).
|
||||
V.best <- NULL
|
||||
loss.best <- Inf
|
||||
|
||||
# Start loop for multiple attempts.
|
||||
for (attempt in 1:attempts) {
|
||||
|
||||
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
|
||||
# optimization start value.
|
||||
V <- rStiefl(p, q)
|
||||
|
||||
# Initial loss and gradient.
|
||||
loss <- Inf
|
||||
G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
|
||||
# Set last loss (aka, loss after applying the step).
|
||||
loss.last <- loss
|
||||
|
||||
# Call logger with initial values before starting optimization.
|
||||
if (is.function(logger)) {
|
||||
epoch <- 0 # Set epoch count to 0 (only relevant for logging).
|
||||
error <- NA
|
||||
logger(environment())
|
||||
}
|
||||
|
||||
## Start optimization loop.
|
||||
for (epoch in 1:epochs) {
|
||||
|
||||
# Cayley transform matrix `A`
|
||||
A <- (G %*% t(V)) - (V %*% t(G))
|
||||
|
||||
# Directional derivative of the loss at current position, given
|
||||
# as `Tr(G^T \cdot A \cdot V)`.
|
||||
loss.prime <- -0.5 * norm(A, type = 'F')^2
|
||||
|
||||
# Linesearch
|
||||
tau.upper <- Inf
|
||||
tau.lower <- 0
|
||||
tau <- tau.init
|
||||
for (iter in 1:max.linesearch.iter) {
|
||||
# Apply learning rate `tau`.
|
||||
A.tau <- (tau / 2) * A
|
||||
# Parallet transport (on Stiefl manifold) into direction of `G`.
|
||||
inv <- solve(I_p + A.tau)
|
||||
V.tau <- inv %*% ((I_p - A.tau) %*% V)
|
||||
|
||||
# Loss at position after a step.
|
||||
loss <- Inf # aka loss.tau
|
||||
G.tau <- grad(X, Y, V.tau, h, loss.out = TRUE, persistent = TRUE)
|
||||
|
||||
# Armijo condition.
|
||||
if (loss > loss.last + (rho1 * tau * loss.prime)) {
|
||||
tau.upper <- tau
|
||||
tau <- (tau.lower + tau.upper) / 2
|
||||
next()
|
||||
}
|
||||
|
||||
V.prime.tau <- -0.5 * inv %*% A %*% (V + V.tau)
|
||||
loss.prime.tau <- sum(G * V.prime.tau) # Tr(grad(tau)^T \cdot Y^'(tau))
|
||||
|
||||
# Wolfe condition.
|
||||
if (loss.prime.tau < rho2 * loss.prime) {
|
||||
tau.lower <- tau
|
||||
if (tau.upper == Inf) {
|
||||
tau <- 2 * tau.lower
|
||||
} else {
|
||||
tau <- (tau.lower + tau.upper) / 2
|
||||
}
|
||||
} else {
|
||||
break()
|
||||
}
|
||||
}
|
||||
|
||||
# Compute error.
|
||||
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
|
||||
|
||||
# Check break condition (epoch check to skip ignored gradient calc).
|
||||
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
|
||||
if (error < tol | epoch >= epochs) {
|
||||
# take last step and stop optimization.
|
||||
V <- V.tau
|
||||
# Final call to the logger before stopping optimization
|
||||
if (is.function(logger)) {
|
||||
G <- G.tau
|
||||
logger(environment())
|
||||
}
|
||||
break()
|
||||
}
|
||||
|
||||
# Perform the step and remember previous loss.
|
||||
V <- V.tau
|
||||
loss.last <- loss
|
||||
G <- G.tau
|
||||
|
||||
# Log after taking current step.
|
||||
if (is.function(logger)) {
|
||||
logger(environment())
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
# Check if current attempt improved previous ones
|
||||
if (loss < loss.best) {
|
||||
loss.best <- loss
|
||||
V.best <- V
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return(list(
|
||||
loss = loss.best,
|
||||
V = V.best,
|
||||
B = null(V.best),
|
||||
h = h
|
||||
))
|
||||
}
|
|
@ -1,48 +0,0 @@
|
|||
#' Compute get gradient of `L(V)` given a dataset `X`.
|
||||
#'
|
||||
#' @param X Data matrix.
|
||||
#' @param Y Responce.
|
||||
#' @param V Position to compute the gradient at, aka point on Stiefl manifold.
|
||||
#' @param h Bandwidth
|
||||
#' @param loss.out Iff \code{TRUE} loss will be written to parent environment.
|
||||
#' @param loss.only Boolean to only compute the loss, of \code{TRUE} a single
|
||||
#' value loss is returned and \code{envir} is ignored.
|
||||
#' @param persistent Determines if data indices and dependent calculations shall
|
||||
#' be reused from the parent environment. ATTENTION: Do NOT set this flag, only
|
||||
#' intended for internal usage by carefully aligned functions!
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
grad <- function(X, Y, V, h,
|
||||
loss.out = FALSE,
|
||||
loss.only = FALSE,
|
||||
persistent = FALSE) {
|
||||
# Get number of samples and dimension.
|
||||
n <- nrow(X)
|
||||
p <- ncol(X)
|
||||
|
||||
if (!persistent) {
|
||||
# Compute lookup indexes for symmetrie, lower/upper
|
||||
# triangular parts and vectorization.
|
||||
pair.index <- elem.pairs(seq(n))
|
||||
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
|
||||
# Index of vectorized matrix, for lower and upper triangular part.
|
||||
lower <- ((i - 1) * n) + j
|
||||
upper <- ((j - 1) * n) + i
|
||||
|
||||
# Create all pairewise differences of rows of `X`.
|
||||
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||
}
|
||||
|
||||
out <- .Call("grad_c", PACKAGE = "CVE",
|
||||
X, X_diff, as.double(Y), V, as.double(h));
|
||||
|
||||
if (loss.only) {
|
||||
return(out$loss)
|
||||
}
|
||||
if (loss.out) {
|
||||
loss <<- out$loss
|
||||
}
|
||||
|
||||
return(out$G)
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
#' Samples uniform from the Stiefel Manifold
|
||||
#' Samples uniform from the Stiefl Manifold.
|
||||
#'
|
||||
#' @param p row dim.
|
||||
#' @param q col dim.
|
||||
|
@ -10,6 +10,48 @@ rStiefl <- function(p, q) {
|
|||
return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
|
||||
}
|
||||
|
||||
#' Retraction to the manifold.
|
||||
#'
|
||||
#' @param A matrix.
|
||||
#' @return `(p, q)` semi-orthogonal matrix, aka element of the Stiefl manifold.
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
retractStiefl <- function(A) {
|
||||
return(qr.Q(qr(A)))
|
||||
}
|
||||
|
||||
#' Skew-Symmetric matrix computed from `A` as
|
||||
#' \eqn{1/2 (A - A^T)}.
|
||||
#' @param A Matrix of dim `(p, q)`
|
||||
#' @return Skew-Symmetric matrix of dim `(p, p)`.
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
skew <- function(A) {
|
||||
0.5 * (A - t(A))
|
||||
}
|
||||
|
||||
#' Symmetric matrix computed from `A` as
|
||||
#' \eqn{1/2 (A + A^T)}.
|
||||
#' @param A Matrix of dim `(p, q)`
|
||||
#' @return Symmetric matrix of dim `(p, p)`.
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
sym <- function(A) {
|
||||
0.5 * (A + t(A))
|
||||
}
|
||||
|
||||
#' Orthogonal Projection onto the tangent space of the stiefl manifold.
|
||||
#'
|
||||
#' @param V Point on the stiefl manifold.
|
||||
#' @param G matrix to be projected onto the tangent space at `V`.
|
||||
#' @return `(p, q)` matrix as element of the tangent space at `V`.
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
projTangentStiefl <- function(V, G) {
|
||||
Q <- diag(1, nrow(V)) - V %*% t(V)
|
||||
return(Q %*% G + V %*% skew(t(V) %*% G))
|
||||
}
|
||||
|
||||
#' Null space basis of given matrix `V`
|
||||
#'
|
||||
#' @param V `(p, q)` matrix
|
||||
|
@ -19,7 +61,7 @@ rStiefl <- function(p, q) {
|
|||
null <- function(V) {
|
||||
tmp <- qr(V)
|
||||
set <- if(tmp$rank == 0L) seq_len(ncol(V)) else -seq_len(tmp$rank)
|
||||
return(qr.Q(tmp, complete=TRUE)[, set, drop=FALSE])
|
||||
return(qr.Q(tmp, complete = TRUE)[, set, drop = FALSE])
|
||||
}
|
||||
|
||||
#' Creates a (numeric) matrix where each column contains
|
||||
|
|
|
@ -5,10 +5,11 @@
|
|||
\alias{cve.call}
|
||||
\title{Implementation of the CVE method.}
|
||||
\usage{
|
||||
cve(formula, data, method = "simple", max.dim = 10, ...)
|
||||
cve(formula, data, method = "simple", max.dim = 10L, ...)
|
||||
|
||||
cve.call(X, Y, method = "simple", nObs = nrow(X)^0.5, min.dim = 1,
|
||||
max.dim = 10, k, ...)
|
||||
cve.call(X, Y, method = "simple", nObs = sqrt(nrow(X)), h = NULL,
|
||||
min.dim = 1L, max.dim = 10L, k = NULL, tau = 1, tol = 0.001,
|
||||
epochs = 50L, attempts = 10L, logger = NULL)
|
||||
}
|
||||
\arguments{
|
||||
\item{formula}{Formel for the regression model defining `X`, `Y`.
|
||||
|
|
|
@ -1,16 +0,0 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/cve_linesearch.R
|
||||
\name{cve_linesearch}
|
||||
\alias{cve_linesearch}
|
||||
\title{Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
|
||||
conditions.}
|
||||
\usage{
|
||||
cve_linesearch(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
|
||||
tol = 0.001, rho1 = 0.1, rho2 = 0.9, slack = 0, epochs = 50L,
|
||||
attempts = 10L, max.linesearch.iter = 10L, logger = NULL)
|
||||
}
|
||||
\description{
|
||||
Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
|
||||
conditions.
|
||||
}
|
||||
\keyword{internal}
|
|
@ -1,16 +0,0 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/cve_sgd.R
|
||||
\name{cve_sgd}
|
||||
\alias{cve_sgd}
|
||||
\title{Simple implementation of the CVE method. 'Simple' means that this method is
|
||||
a classic GD method unsing no further tricks.}
|
||||
\usage{
|
||||
cve_sgd(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 0.01,
|
||||
tol = 0.001, epochs = 50L, batch.size = 16L, attempts = 10L,
|
||||
logger = NULL)
|
||||
}
|
||||
\description{
|
||||
Simple implementation of the CVE method. 'Simple' means that this method is
|
||||
a classic GD method unsing no further tricks.
|
||||
}
|
||||
\keyword{internal}
|
|
@ -1,16 +0,0 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/cve_simple.R
|
||||
\name{cve_simple}
|
||||
\alias{cve_simple}
|
||||
\title{Simple implementation of the CVE method. 'Simple' means that this method is
|
||||
a classic GD method unsing no further tricks.}
|
||||
\usage{
|
||||
cve_simple(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
|
||||
tol = 0.001, slack = 0, epochs = 50L, attempts = 10L,
|
||||
logger = NULL)
|
||||
}
|
||||
\description{
|
||||
Simple implementation of the CVE method. 'Simple' means that this method is
|
||||
a classic GD method unsing no further tricks.
|
||||
}
|
||||
\keyword{internal}
|
|
@ -1,31 +0,0 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/gradient.R
|
||||
\name{grad}
|
||||
\alias{grad}
|
||||
\title{Compute get gradient of `L(V)` given a dataset `X`.}
|
||||
\usage{
|
||||
grad(X, Y, V, h, loss.out = FALSE, loss.only = FALSE,
|
||||
persistent = FALSE)
|
||||
}
|
||||
\arguments{
|
||||
\item{X}{Data matrix.}
|
||||
|
||||
\item{Y}{Responce.}
|
||||
|
||||
\item{V}{Position to compute the gradient at, aka point on Stiefl manifold.}
|
||||
|
||||
\item{h}{Bandwidth}
|
||||
|
||||
\item{loss.out}{Iff \code{TRUE} loss will be written to parent environment.}
|
||||
|
||||
\item{loss.only}{Boolean to only compute the loss, of \code{TRUE} a single
|
||||
value loss is returned and \code{envir} is ignored.}
|
||||
|
||||
\item{persistent}{Determines if data indices and dependent calculations shall
|
||||
be reused from the parent environment. ATTENTION: Do NOT set this flag, only
|
||||
intended for internal usage by carefully aligned functions!}
|
||||
}
|
||||
\description{
|
||||
Compute get gradient of `L(V)` given a dataset `X`.
|
||||
}
|
||||
\keyword{internal}
|
|
@ -0,0 +1,20 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/util.R
|
||||
\name{projTangentStiefl}
|
||||
\alias{projTangentStiefl}
|
||||
\title{Orthogonal Projection onto the tangent space of the stiefl manifold.}
|
||||
\usage{
|
||||
projTangentStiefl(V, G)
|
||||
}
|
||||
\arguments{
|
||||
\item{V}{Point on the stiefl manifold.}
|
||||
|
||||
\item{G}{matrix to be projected onto the tangent space at `V`.}
|
||||
}
|
||||
\value{
|
||||
`(p, q)` matrix as element of the tangent space at `V`.
|
||||
}
|
||||
\description{
|
||||
Orthogonal Projection onto the tangent space of the stiefl manifold.
|
||||
}
|
||||
\keyword{internal}
|
|
@ -2,7 +2,7 @@
|
|||
% Please edit documentation in R/util.R
|
||||
\name{rStiefl}
|
||||
\alias{rStiefl}
|
||||
\title{Samples uniform from the Stiefel Manifold}
|
||||
\title{Samples uniform from the Stiefl Manifold.}
|
||||
\usage{
|
||||
rStiefl(p, q)
|
||||
}
|
||||
|
@ -15,7 +15,7 @@ rStiefl(p, q)
|
|||
`(p, q)` semi-orthogonal matrix
|
||||
}
|
||||
\description{
|
||||
Samples uniform from the Stiefel Manifold
|
||||
Samples uniform from the Stiefl Manifold.
|
||||
}
|
||||
\examples{
|
||||
V <- rStiefel(6, 4)
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/util.R
|
||||
\name{retractStiefl}
|
||||
\alias{retractStiefl}
|
||||
\title{Retraction to the manifold.}
|
||||
\usage{
|
||||
retractStiefl(A)
|
||||
}
|
||||
\arguments{
|
||||
\item{A}{matrix.}
|
||||
}
|
||||
\value{
|
||||
`(p, q)` semi-orthogonal matrix, aka element of the Stiefl manifold.
|
||||
}
|
||||
\description{
|
||||
Retraction to the manifold.
|
||||
}
|
||||
\keyword{internal}
|
|
@ -0,0 +1,20 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/util.R
|
||||
\name{skew}
|
||||
\alias{skew}
|
||||
\title{Skew-Symmetric matrix computed from `A` as
|
||||
\eqn{1/2 (A - A^T)}.}
|
||||
\usage{
|
||||
skew(A)
|
||||
}
|
||||
\arguments{
|
||||
\item{A}{Matrix of dim `(p, q)`}
|
||||
}
|
||||
\value{
|
||||
Skew-Symmetric matrix of dim `(p, p)`.
|
||||
}
|
||||
\description{
|
||||
Skew-Symmetric matrix computed from `A` as
|
||||
\eqn{1/2 (A - A^T)}.
|
||||
}
|
||||
\keyword{internal}
|
|
@ -0,0 +1,20 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/util.R
|
||||
\name{sym}
|
||||
\alias{sym}
|
||||
\title{Symmetric matrix computed from `A` as
|
||||
\eqn{1/2 (A + A^T)}.}
|
||||
\usage{
|
||||
sym(A)
|
||||
}
|
||||
\arguments{
|
||||
\item{A}{Matrix of dim `(p, q)`}
|
||||
}
|
||||
\value{
|
||||
Symmetric matrix of dim `(p, p)`.
|
||||
}
|
||||
\description{
|
||||
Symmetric matrix computed from `A` as
|
||||
\eqn{1/2 (A + A^T)}.
|
||||
}
|
||||
\keyword{internal}
|
|
@ -12,7 +12,7 @@ void cve_simple_sub(const int n, const int p, const int q,
|
|||
double *V, double *L,
|
||||
SEXP logger, SEXP loggerEnv) {
|
||||
|
||||
int attempt, epoch, i, j, k, nn = (n * (n - 1)) / 2;
|
||||
int attempt, epoch, i, nn = (n * (n - 1)) / 2;
|
||||
double loss, loss_last, loss_best, err, tau;
|
||||
double tol = tol_init * sqrt((double)(2 * q));
|
||||
double gKscale = -0.5 / h;
|
||||
|
|
175
CVE_R/R/CVE.R
175
CVE_R/R/CVE.R
|
@ -10,6 +10,7 @@
|
|||
#'
|
||||
#' @docType package
|
||||
#' @author Loki
|
||||
#' @useDynLib CVE, .registration = TRUE
|
||||
"_PACKAGE"
|
||||
|
||||
#' Implementation of the CVE method.
|
||||
|
@ -54,21 +55,21 @@
|
|||
#' @import stats
|
||||
#' @importFrom stats model.frame
|
||||
#' @export
|
||||
cve <- function(formula, data, method = "simple", max.dim = 10, ...) {
|
||||
cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
|
||||
# check for type of `data` if supplied and set default
|
||||
if (missing(data)) {
|
||||
data <- environment(formula)
|
||||
} else if (!is.data.frame(data)) {
|
||||
stop('Parameter `data` must be a `data.frame` or missing.')
|
||||
stop("Parameter 'data' must be a 'data.frame' or missing.")
|
||||
}
|
||||
|
||||
# extract `X`, `Y` from `formula` with `data`
|
||||
model <- stats::model.frame(formula, data)
|
||||
X <- as.matrix(model[,-1, drop = FALSE])
|
||||
Y <- as.matrix(model[, 1, drop = FALSE])
|
||||
X <- as.matrix(model[ ,-1L, drop = FALSE])
|
||||
Y <- as.double(model[ , 1L])
|
||||
|
||||
# pass extracted data on to [cve.call()]
|
||||
dr <- cve.call(X, Y, method = method, ...)
|
||||
dr <- cve.call(X, Y, method = method, max.dim = max.dim, ...)
|
||||
|
||||
# overwrite `call` property from [cve.call()]
|
||||
dr$call <- match.call()
|
||||
|
@ -83,35 +84,81 @@ cve <- function(formula, data, method = "simple", max.dim = 10, ...) {
|
|||
#' @param ... Method specific parameters.
|
||||
#' @rdname cve
|
||||
#' @export
|
||||
cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5,
|
||||
min.dim = 1, max.dim = 10, k, ...) {
|
||||
cve.call <- function(X, Y, method = "simple",
|
||||
nObs = sqrt(nrow(X)), h = NULL,
|
||||
min.dim = 1L, max.dim = 10L, k = NULL,
|
||||
tau = 1.0, tol = 1e-3,
|
||||
epochs = 50L, attempts = 10L,
|
||||
logger = NULL) {
|
||||
|
||||
# parameter checking
|
||||
if (!is.matrix(X)) {
|
||||
stop('X should be a matrices.')
|
||||
if (!(is.matrix(X) && is.numeric(X))) {
|
||||
stop("Parameter 'X' should be a numeric matrices.")
|
||||
}
|
||||
if (is.matrix(Y)) {
|
||||
Y <- as.vector(Y)
|
||||
if (!is.numeric(Y)) {
|
||||
stop("Parameter 'Y' must be numeric.")
|
||||
}
|
||||
if (is.matrix(Y) || !is.double(Y)) {
|
||||
Y <- as.double(Y)
|
||||
}
|
||||
if (nrow(X) != length(Y)) {
|
||||
stop('Rows of X and number of Y elements are not compatible.')
|
||||
stop("Rows of 'X' and 'Y' elements are not compatible.")
|
||||
}
|
||||
if (ncol(X) < 2) {
|
||||
stop('X is one dimensional, no need for dimension reduction.')
|
||||
stop("'X' is one dimensional, no need for dimension reduction.")
|
||||
}
|
||||
|
||||
if (!missing(k)) {
|
||||
min.dim <- as.integer(k)
|
||||
max.dim <- as.integer(k)
|
||||
} else {
|
||||
if (missing(k) || is.null(k)) {
|
||||
min.dim <- as.integer(min.dim)
|
||||
max.dim <- as.integer(min(max.dim, ncol(X) - 1L))
|
||||
} else {
|
||||
min.dim <- as.integer(k)
|
||||
max.dim <- as.integer(k)
|
||||
}
|
||||
if (min.dim > max.dim) {
|
||||
stop('`min.dim` bigger `max.dim`.')
|
||||
stop("'min.dim' bigger 'max.dim'.")
|
||||
}
|
||||
if (max.dim >= ncol(X)) {
|
||||
stop('`max.dim` must be smaller than `ncol(X)`.')
|
||||
stop("'max.dim' (or 'k') must be smaller than 'ncol(X)'.")
|
||||
}
|
||||
|
||||
if (is.function(h)) {
|
||||
estimate.bandwidth <- h
|
||||
h <- NULL
|
||||
}
|
||||
|
||||
if (!is.numeric(tau) || length(tau) > 1L || tau <= 0.0) {
|
||||
stop("Initial step-width 'tau' must be positive number.")
|
||||
} else {
|
||||
tau <- as.double(tau)
|
||||
}
|
||||
if (!is.numeric(tol) || length(tol) > 1L || tol < 0.0) {
|
||||
stop("Break condition tolerance 'tol' must be not negative number.")
|
||||
} else {
|
||||
tol <- as.double(tol)
|
||||
}
|
||||
|
||||
if (!is.numeric(epochs) || length(epochs) > 1L) {
|
||||
stop("Parameter 'epochs' must be positive integer.")
|
||||
} else if (!is.integer(epochs)) {
|
||||
epochs <- as.integer(epochs)
|
||||
}
|
||||
if (epochs < 1L) {
|
||||
stop("Parameter 'epochs' must be at least 1L.")
|
||||
}
|
||||
if (!is.numeric(attempts) || length(attempts) > 1L) {
|
||||
stop("Parameter 'attempts' must be positive integer.")
|
||||
} else if (!is.integer(attempts)) {
|
||||
attempts <- as.integer(attempts)
|
||||
}
|
||||
if (attempts < 1L) {
|
||||
stop("Parameter 'attempts' must be at least 1L.")
|
||||
}
|
||||
|
||||
if (is.function(logger)) {
|
||||
loggerEnv <- environment(logger)
|
||||
} else {
|
||||
loggerEnv <- NULL
|
||||
}
|
||||
|
||||
# Call specified method.
|
||||
|
@ -119,15 +166,40 @@ cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5,
|
|||
call <- match.call()
|
||||
dr <- list()
|
||||
for (k in min.dim:max.dim) {
|
||||
|
||||
if (missing(h) || is.null(h)) {
|
||||
h <- estimate.bandwidth(X, k, nObs)
|
||||
} else if (is.numeric(h) && h > 0.0) {
|
||||
h <- as.double(h)
|
||||
} else {
|
||||
stop("Bandwidth 'h' must be positive numeric.")
|
||||
}
|
||||
|
||||
if (method == 'simple') {
|
||||
dr.k <- cve_simple(X, Y, k, nObs = nObs, ...)
|
||||
} else if (method == 'linesearch') {
|
||||
dr.k <- cve_linesearch(X, Y, k, nObs = nObs, ...)
|
||||
} else if (method == 'sgd') {
|
||||
dr.k <- cve_sgd(X, Y, k, nObs = nObs, ...)
|
||||
dr.k <- .Call('cve_simple', PACKAGE = 'CVE',
|
||||
X, Y, k, h,
|
||||
tau, tol,
|
||||
epochs, attempts,
|
||||
logger, loggerEnv)
|
||||
# dr.k <- cve_simple(X, Y, k, nObs = nObs, ...)
|
||||
# } else if (method == 'linesearch') {
|
||||
# dr.k <- cve_linesearch(X, Y, k, nObs = nObs, ...)
|
||||
# } else if (method == 'rcg') {
|
||||
# dr.k <- cve_rcg(X, Y, k, nObs = nObs, ...)
|
||||
# } else if (method == 'momentum') {
|
||||
# dr.k <- cve_momentum(X, Y, k, nObs = nObs, ...)
|
||||
# } else if (method == 'rmsprob') {
|
||||
# dr.k <- cve_rmsprob(X, Y, k, nObs = nObs, ...)
|
||||
# } else if (method == 'sgdrmsprob') {
|
||||
# dr.k <- cve_sgdrmsprob(X, Y, k, nObs = nObs, ...)
|
||||
# } else if (method == 'sgd') {
|
||||
# dr.k <- cve_sgd(X, Y, k, nObs = nObs, ...)
|
||||
} else {
|
||||
stop('Got unknown method.')
|
||||
}
|
||||
dr.k$B <- null(dr.k$V)
|
||||
dr.k$loss <- mean(dr.k$L)
|
||||
dr.k$h <- h
|
||||
dr.k$k <- k
|
||||
class(dr.k) <- "cve.k"
|
||||
dr[[k]] <- dr.k
|
||||
|
@ -140,11 +212,6 @@ cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5,
|
|||
return(dr)
|
||||
}
|
||||
|
||||
# TODO: write summary
|
||||
# summary.cve <- function() {
|
||||
# # code #
|
||||
# }
|
||||
|
||||
#' Ploting helper for objects of class \code{cve}.
|
||||
#'
|
||||
#' @param x Object of class \code{cve} (result of [cve()]).
|
||||
|
@ -164,43 +231,19 @@ cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5,
|
|||
#' @method plot cve
|
||||
#' @export
|
||||
plot.cve <- function(x, ...) {
|
||||
|
||||
|
||||
# H <- x$history
|
||||
# H_1 <- H[!is.na(H[, 1]), 1]
|
||||
|
||||
# defaults <- list(
|
||||
# main = "History",
|
||||
# xlab = "Iterations i",
|
||||
# ylab = expression(loss == L[n](V^{(i)})),
|
||||
# xlim = c(1, nrow(H)),
|
||||
# ylim = c(0, max(H)),
|
||||
# type = "l"
|
||||
# )
|
||||
|
||||
# call.plot <- match.call()
|
||||
# keys <- names(defaults)
|
||||
# keys <- keys[match(keys, names(call.plot)[-1], nomatch = 0) == 0]
|
||||
|
||||
# for (key in keys) {
|
||||
# call.plot[[key]] <- defaults[[key]]
|
||||
# }
|
||||
|
||||
# call.plot[[1L]] <- quote(plot)
|
||||
# call.plot$x <- quote(1:length(H_1))
|
||||
# call.plot$y <- quote(H_1)
|
||||
|
||||
# eval(call.plot)
|
||||
|
||||
# if (ncol(H) > 1) {
|
||||
# for (i in 2:ncol(H)) {
|
||||
# H_i <- H[H[, i] > 0, i]
|
||||
# lines(1:length(H_i), H_i)
|
||||
# }
|
||||
# }
|
||||
# x.ends <- apply(H, 2, function(h) { length(h[!is.na(h)]) })
|
||||
# y.ends <- apply(H, 2, function(h) { tail(h[!is.na(h)], n=1) })
|
||||
# points(x.ends, y.ends)
|
||||
L <- c()
|
||||
k <- c()
|
||||
for (dr.k in x) {
|
||||
if (class(dr.k) == 'cve.k') {
|
||||
k <- c(k, paste0(dr.k$k))
|
||||
L <- c(L, dr.k$L)
|
||||
}
|
||||
}
|
||||
L <- matrix(L, ncol = length(k))
|
||||
boxplot(L, main = "Loss ...",
|
||||
xlab = "SDR dimension k",
|
||||
ylab = expression(L(V, X[i])),
|
||||
names = k)
|
||||
}
|
||||
|
||||
#' Prints a summary of a \code{cve} result.
|
||||
|
|
|
@ -1,17 +1,19 @@
|
|||
#' Simple implementation of the CVE method. 'Simple' means that this method is
|
||||
#' a classic GD method unsing no further tricks.
|
||||
#' Implementation of the CVE method as a Riemann Conjugated Gradient method.
|
||||
#'
|
||||
#' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector
|
||||
#' Transport for Optimization on the Stiefel Manifold
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
cve_simple <- function(X, Y, k,
|
||||
nObs = sqrt(nrow(X)),
|
||||
h = NULL,
|
||||
tau = 1.0,
|
||||
tol = 1e-3,
|
||||
slack = 0,
|
||||
epochs = 50L,
|
||||
attempts = 10L,
|
||||
logger = NULL
|
||||
cve_momentum <- function(X, Y, k,
|
||||
nObs = sqrt(nrow(X)),
|
||||
h = NULL,
|
||||
tau = 1.0,
|
||||
tol = 1e-4,
|
||||
rho = 0.1, # Momentum update.
|
||||
slack = 0,
|
||||
epochs = 50L,
|
||||
attempts = 10L,
|
||||
logger = NULL
|
||||
) {
|
||||
# Set `grad` functions environment to enable if to find this environments
|
||||
# local variabels, needed to enable the manipulation of this local variables
|
||||
|
@ -67,9 +69,6 @@ cve_simple <- function(X, Y, k,
|
|||
# Set last loss (aka, loss after applying the step).
|
||||
loss.last <- loss
|
||||
|
||||
# Cayley transform matrix `A`
|
||||
A <- (G %*% t(V)) - (V %*% t(G))
|
||||
|
||||
# Call logger with initial values before starting optimization.
|
||||
if (is.function(logger)) {
|
||||
epoch <- 0 # Set epoch count to 0 (only relevant for logging).
|
||||
|
@ -77,12 +76,15 @@ cve_simple <- function(X, Y, k,
|
|||
logger(environment())
|
||||
}
|
||||
|
||||
M <- matrix(0, p, q)
|
||||
## Start optimization loop.
|
||||
for (epoch in 1:epochs) {
|
||||
# Apply learning rate `tau`.
|
||||
A.tau <- tau * A
|
||||
A <- projTangentStiefl(V, G)
|
||||
# Momentum update.
|
||||
M <- A + rho * projTangentStiefl(V, M)
|
||||
# Parallet transport (on Stiefl manifold) into direction of `G`.
|
||||
V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
|
||||
V.tau <- retractStiefl(V - tau * M)
|
||||
|
||||
# Loss at position after a step.
|
||||
loss <- grad(X, Y, V.tau, h, loss.only = TRUE, persistent = TRUE)
|
||||
|
@ -119,9 +121,6 @@ cve_simple <- function(X, Y, k,
|
|||
|
||||
# Compute gradient at new position.
|
||||
G <- grad(X, Y, V, h, persistent = TRUE)
|
||||
|
||||
# Cayley transform matrix `A`
|
||||
A <- (G %*% t(V)) - (V %*% t(G))
|
||||
}
|
||||
|
||||
# Check if current attempt improved previous ones
|
||||
|
@ -129,7 +128,6 @@ cve_simple <- function(X, Y, k,
|
|||
loss.best <- loss
|
||||
V.best <- V
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return(list(
|
|
@ -0,0 +1,179 @@
|
|||
#' Implementation of the CVE method as a Riemann Conjugated Gradient method.
|
||||
#'
|
||||
#' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector
|
||||
#' Transport for Optimization on the Stiefel Manifold
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
cve_rcg <- function(X, Y, k,
|
||||
nObs = sqrt(nrow(X)),
|
||||
h = NULL,
|
||||
tau = 1.0,
|
||||
tol = 1e-4,
|
||||
rho = 1e-4, # For Armijo condition.
|
||||
slack = 0,
|
||||
epochs = 50L,
|
||||
attempts = 10L,
|
||||
max.linesearch.iter = 20L,
|
||||
logger = NULL
|
||||
) {
|
||||
# Set `grad` functions environment to enable if to find this environments
|
||||
# local variabels, needed to enable the manipulation of this local variables
|
||||
# from within `grad`.
|
||||
environment(grad) <- environment()
|
||||
|
||||
# Get dimensions.
|
||||
n <- nrow(X) # Number of samples.
|
||||
p <- ncol(X) # Data dimensions
|
||||
q <- p - k # Complement dimension of the SDR space.
|
||||
|
||||
# Save initial learning rate `tau`.
|
||||
tau.init <- tau
|
||||
# Addapt tolearance for break condition.
|
||||
tol <- sqrt(2 * q) * tol
|
||||
|
||||
# Estaimate bandwidth if not given.
|
||||
if (missing(h) || !is.numeric(h)) {
|
||||
h <- estimate.bandwidth(X, k, nObs)
|
||||
}
|
||||
|
||||
# Compute persistent data.
|
||||
# Compute lookup indexes for symmetrie, lower/upper
|
||||
# triangular parts and vectorization.
|
||||
pair.index <- elem.pairs(seq(n))
|
||||
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
|
||||
# Index of vectorized matrix, for lower and upper triangular part.
|
||||
lower <- ((i - 1) * n) + j
|
||||
upper <- ((j - 1) * n) + i
|
||||
|
||||
# Create all pairewise differences of rows of `X`.
|
||||
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||
# Identity matrix.
|
||||
I_p <- diag(1, p)
|
||||
|
||||
# Init tracking of current best (according multiple attempts).
|
||||
V.best <- NULL
|
||||
loss.best <- Inf
|
||||
|
||||
# Start loop for multiple attempts.
|
||||
for (attempt in 1:attempts) {
|
||||
# Reset learning rate `tau`.
|
||||
tau <- tau.init
|
||||
|
||||
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
|
||||
# optimization start value.
|
||||
V <- rStiefl(p, q)
|
||||
|
||||
# Initial loss and gradient.
|
||||
loss <- Inf
|
||||
G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
|
||||
# Set last loss (aka, loss after applying the step).
|
||||
loss.last <- loss
|
||||
|
||||
# Cayley transform matrix `A`
|
||||
A <- (G %*% t(V)) - (V %*% t(G))
|
||||
A.last <- A
|
||||
|
||||
W <- -A
|
||||
Z <- W %*% V
|
||||
|
||||
# Compute directional derivative.
|
||||
loss.prime <- sum(G * Z) # Tr(G^T Z)
|
||||
|
||||
# Call logger with initial values before starting optimization.
|
||||
if (is.function(logger)) {
|
||||
epoch <- 0 # Set epoch count to 0 (only relevant for logging).
|
||||
error <- NA
|
||||
logger(environment())
|
||||
}
|
||||
|
||||
## Start optimization loop.
|
||||
for (epoch in 1:epochs) {
|
||||
# New directional derivative.
|
||||
loss.prime <- sum(G * Z)
|
||||
|
||||
# Reset `tau` for step-size selection.
|
||||
tau <- tau.init
|
||||
for (iter in 1:max.linesearch.iter) {
|
||||
V.tau <- retractStiefl(V + tau * Z)
|
||||
# Loss at position after a step.
|
||||
loss <- grad(X, Y, V.tau, h,
|
||||
loss.only = TRUE, persistent = TRUE)
|
||||
# Check Armijo condition.
|
||||
if (loss <= loss.last + (rho * tau * loss.prime)) {
|
||||
break() # Iff fulfilled stop linesearch.
|
||||
}
|
||||
# Reduce step-size and continue linesearch.
|
||||
tau <- tau / 2
|
||||
}
|
||||
|
||||
# Compute error.
|
||||
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
|
||||
|
||||
# Perform step with found step-size
|
||||
V <- V.tau
|
||||
loss.last <- loss
|
||||
|
||||
# Call logger.
|
||||
if (is.function(logger)) {
|
||||
logger(environment())
|
||||
}
|
||||
|
||||
# Check break condition.
|
||||
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
|
||||
if (error < tol) {
|
||||
break()
|
||||
}
|
||||
|
||||
# Compute Gradient at new position.
|
||||
G <- grad(X, Y, V, h, persistent = TRUE)
|
||||
# Store last `A` for `beta` computation.
|
||||
A.last <- A
|
||||
# Cayley transform matrix `A`
|
||||
A <- (G %*% t(V)) - (V %*% t(G))
|
||||
|
||||
# Check 2. break condition.
|
||||
if (norm(A, type = 'F') < tol) {
|
||||
break()
|
||||
}
|
||||
|
||||
# New directional derivative.
|
||||
loss.prime <- sum(G * Z)
|
||||
|
||||
# Reset beta if needed.
|
||||
if (loss.prime < 0) {
|
||||
# Compute `beta` as describet in paper.
|
||||
beta.FR <- (norm(A, type = 'F') / norm(A.last, type = 'F'))^2
|
||||
beta.PR <- sum(A * (A - A.last)) / norm(A.last, type = 'F')^2
|
||||
if (beta.PR < -beta.FR) {
|
||||
beta <- -beta.FR
|
||||
} else if (abs(beta.PR) < beta.FR) {
|
||||
beta <- beta.PR
|
||||
} else if (beta.PR > beta.FR) {
|
||||
beta <- beta.FR
|
||||
} else {
|
||||
beta <- 0
|
||||
}
|
||||
} else {
|
||||
beta <- 0
|
||||
}
|
||||
|
||||
# Update direction.
|
||||
W <- -A + beta * W
|
||||
Z <- W %*% V
|
||||
}
|
||||
|
||||
# Check if current attempt improved previous ones
|
||||
if (loss < loss.best) {
|
||||
loss.best <- loss
|
||||
V.best <- V
|
||||
}
|
||||
}
|
||||
|
||||
return(list(
|
||||
loss = loss.best,
|
||||
V = V.best,
|
||||
B = null(V.best),
|
||||
h = h
|
||||
))
|
||||
}
|
|
@ -0,0 +1,121 @@
|
|||
#' Implementation of the CVE method as a Riemann Conjugated Gradient method.
|
||||
#'
|
||||
#' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector
|
||||
#' Transport for Optimization on the Stiefel Manifold
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
cve_rmsprob <- function(X, Y, k,
|
||||
nObs = sqrt(nrow(X)),
|
||||
h = NULL,
|
||||
tau = 0.1,
|
||||
tol = 1e-4,
|
||||
rho = 0.1, # Momentum update.
|
||||
slack = 0,
|
||||
epochs = 50L,
|
||||
attempts = 10L,
|
||||
epsilon = 1e-7,
|
||||
max.linesearch.iter = 20L,
|
||||
logger = NULL
|
||||
) {
|
||||
# Set `grad` functions environment to enable if to find this environments
|
||||
# local variabels, needed to enable the manipulation of this local variables
|
||||
# from within `grad`.
|
||||
environment(grad) <- environment()
|
||||
|
||||
# Get dimensions.
|
||||
n <- nrow(X) # Number of samples.
|
||||
p <- ncol(X) # Data dimensions
|
||||
q <- p - k # Complement dimension of the SDR space.
|
||||
|
||||
# Save initial learning rate `tau`.
|
||||
tau.init <- tau
|
||||
# Addapt tolearance for break condition.
|
||||
tol <- sqrt(2 * q) * tol
|
||||
|
||||
# Estaimate bandwidth if not given.
|
||||
if (missing(h) || !is.numeric(h)) {
|
||||
h <- estimate.bandwidth(X, k, nObs)
|
||||
}
|
||||
|
||||
# Compute persistent data.
|
||||
# Compute lookup indexes for symmetrie, lower/upper
|
||||
# triangular parts and vectorization.
|
||||
pair.index <- elem.pairs(seq(n))
|
||||
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
|
||||
# Index of vectorized matrix, for lower and upper triangular part.
|
||||
lower <- ((i - 1) * n) + j
|
||||
upper <- ((j - 1) * n) + i
|
||||
|
||||
# Create all pairewise differences of rows of `X`.
|
||||
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||
# Identity matrix.
|
||||
I_p <- diag(1, p)
|
||||
|
||||
# Init tracking of current best (according multiple attempts).
|
||||
V.best <- NULL
|
||||
loss.best <- Inf
|
||||
|
||||
# Start loop for multiple attempts.
|
||||
for (attempt in 1:attempts) {
|
||||
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
|
||||
# optimization start value.
|
||||
V <- rStiefl(p, q)
|
||||
|
||||
# Call logger with initial values before starting optimization.
|
||||
if (is.function(logger)) {
|
||||
loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
|
||||
epoch <- 0 # Set epoch count to 0 (only relevant for logging).
|
||||
error <- NA
|
||||
logger(environment())
|
||||
}
|
||||
|
||||
M <- matrix(0, p, q)
|
||||
## Start optimization loop.
|
||||
for (epoch in 1:epochs) {
|
||||
# Compute gradient and loss at current position.
|
||||
loss <- Inf
|
||||
G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
|
||||
# Projectd Gradient.
|
||||
A <- projTangentStiefl(V, G)
|
||||
# Projected element squared gradient.
|
||||
Asq <- projTangentStiefl(V, G * G)
|
||||
# Momentum update.
|
||||
M <- (1 - rho) * Asq + rho * projTangentStiefl(V, M)
|
||||
# Parallet transport (on Stiefl manifold) into direction of `G`.
|
||||
V.tau <- retractStiefl(V - tau.init * A / (sqrt(abs(M)) + epsilon))
|
||||
|
||||
# Compute error.
|
||||
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
|
||||
|
||||
# Perform step.
|
||||
V <- V.tau
|
||||
|
||||
# Call logger after taking a step.
|
||||
if (is.function(logger)) {
|
||||
# Set tau to an step size estimate (only for logging)
|
||||
tau <- tau.init / mean(sqrt(abs(M)) + epsilon)
|
||||
logger(environment())
|
||||
}
|
||||
|
||||
# Check break condition.
|
||||
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
|
||||
if (error < tol) {
|
||||
break()
|
||||
}
|
||||
}
|
||||
|
||||
# Check if current attempt improved previous ones
|
||||
if (loss < loss.best) {
|
||||
loss.best <- loss
|
||||
V.best <- V
|
||||
}
|
||||
}
|
||||
|
||||
return(list(
|
||||
loss = loss.best,
|
||||
V = V.best,
|
||||
B = null(V.best),
|
||||
h = h
|
||||
))
|
||||
}
|
|
@ -3,14 +3,16 @@
|
|||
#'
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
cve_sgd <- function(X, Y, k,
|
||||
cve_sgdrmsprob <- function(X, Y, k,
|
||||
nObs = sqrt(nrow(X)),
|
||||
h = NULL,
|
||||
tau = 0.01,
|
||||
tol = 1e-3,
|
||||
tau = 0.1,
|
||||
tol = 1e-4,
|
||||
rho = 0.1,
|
||||
epochs = 50L,
|
||||
batch.size = 16L,
|
||||
attempts = 10L,
|
||||
epsilon = 1e-7,
|
||||
logger = NULL
|
||||
) {
|
||||
# Set `grad` functions environment to enable if to find this environments
|
||||
|
@ -72,6 +74,7 @@ cve_sgd <- function(X, Y, k,
|
|||
logger(environment())
|
||||
}
|
||||
|
||||
M <- matrix(0, p, q)
|
||||
# Repeat `epochs` times
|
||||
for (epoch in 1:epochs) {
|
||||
# Shuffle batches
|
||||
|
@ -87,13 +90,14 @@ cve_sgd <- function(X, Y, k,
|
|||
loss <- NULL
|
||||
G <- grad(X[batch, ], Y[batch], V, h, loss.out = TRUE)
|
||||
|
||||
# Cayley transform matrix.
|
||||
A <- (G %*% t(V)) - (V %*% t(G))
|
||||
|
||||
# Apply learning rate `tau`.
|
||||
A.tau <- tau * A
|
||||
# Projectd Gradient.
|
||||
A <- projTangentStiefl(V, G)
|
||||
# Projected element squared gradient.
|
||||
Asq <- projTangentStiefl(V, G * G)
|
||||
# Momentum update.
|
||||
M <- (1 - rho) * Asq + rho * projTangentStiefl(V, M)
|
||||
# Parallet transport (on Stiefl manifold) into direction of `G`.
|
||||
V <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
|
||||
V <- retractStiefl(V - tau.init * A / (sqrt(abs(M)) + epsilon))
|
||||
}
|
||||
# And the error for the history.
|
||||
error <- norm(V.last %*% t(V.last) - V %*% t(V), type = "F")
|
|
@ -72,9 +72,7 @@ cve_simple <- function(X, Y, k,
|
|||
|
||||
# Call logger with initial values before starting optimization.
|
||||
if (is.function(logger)) {
|
||||
epoch <- 0 # Set epoch count to 0 (only relevant for logging).
|
||||
error <- NA
|
||||
logger(environment())
|
||||
logger(0L, attempt, loss, V, tau)
|
||||
}
|
||||
|
||||
## Start optimization loop.
|
||||
|
@ -103,7 +101,7 @@ cve_simple <- function(X, Y, k,
|
|||
V <- V.tau
|
||||
# Call logger last time befor stoping.
|
||||
if (is.function(logger)) {
|
||||
logger(environment())
|
||||
logger(epoch, attempt, loss, V, tau)
|
||||
}
|
||||
break()
|
||||
}
|
||||
|
@ -114,7 +112,7 @@ cve_simple <- function(X, Y, k,
|
|||
|
||||
# Call logger after taking a step.
|
||||
if (is.function(logger)) {
|
||||
logger(environment())
|
||||
logger(epoch, attempt, loss, V, tau)
|
||||
}
|
||||
|
||||
# Compute gradient at new position.
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#' Samples uniform from the Stiefel Manifold
|
||||
#' Samples uniform from the Stiefl Manifold.
|
||||
#'
|
||||
#' @param p row dim.
|
||||
#' @param q col dim.
|
||||
|
@ -10,6 +10,48 @@ rStiefl <- function(p, q) {
|
|||
return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
|
||||
}
|
||||
|
||||
#' Retraction to the manifold.
|
||||
#'
|
||||
#' @param A matrix.
|
||||
#' @return `(p, q)` semi-orthogonal matrix, aka element of the Stiefl manifold.
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
retractStiefl <- function(A) {
|
||||
return(qr.Q(qr(A)))
|
||||
}
|
||||
|
||||
#' Skew-Symmetric matrix computed from `A` as
|
||||
#' \eqn{1/2 (A - A^T)}.
|
||||
#' @param A Matrix of dim `(p, q)`
|
||||
#' @return Skew-Symmetric matrix of dim `(p, p)`.
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
skew <- function(A) {
|
||||
0.5 * (A - t(A))
|
||||
}
|
||||
|
||||
#' Symmetric matrix computed from `A` as
|
||||
#' \eqn{1/2 (A + A^T)}.
|
||||
#' @param A Matrix of dim `(p, q)`
|
||||
#' @return Symmetric matrix of dim `(p, p)`.
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
sym <- function(A) {
|
||||
0.5 * (A + t(A))
|
||||
}
|
||||
|
||||
#' Orthogonal Projection onto the tangent space of the stiefl manifold.
|
||||
#'
|
||||
#' @param V Point on the stiefl manifold.
|
||||
#' @param G matrix to be projected onto the tangent space at `V`.
|
||||
#' @return `(p, q)` matrix as element of the tangent space at `V`.
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
projTangentStiefl <- function(V, G) {
|
||||
Q <- diag(1, nrow(V)) - V %*% t(V)
|
||||
return(Q %*% G + V %*% skew(t(V) %*% G))
|
||||
}
|
||||
|
||||
#' Null space basis of given matrix `V`
|
||||
#'
|
||||
#' @param V `(p, q)` matrix
|
||||
|
|
|
@ -0,0 +1,113 @@
|
|||
|
||||
#C:\Users\Lukas\Desktop\owncloud\Shared\Lukas\CVE
|
||||
install.packages("C:/Users/Lukas/Desktop/owncloud/Shared/Lukas/CVE_1.0.tar.gz", repos=NULL, type="source")
|
||||
install.packages(file.choose(), repos=NULL, type="source")
|
||||
|
||||
dim<-12
|
||||
N<-100
|
||||
s<-0.5
|
||||
dat<-creat_sample(rep(1,dim)/sqrt(dim),N,fsquare,0.5)
|
||||
test<-cve(Y~.,data=as.data.frame(dat),k=1)
|
||||
##############
|
||||
#initialize model parameterss
|
||||
m<-100 #number of replications in simulation
|
||||
dim<-12 #dimension of random variable X
|
||||
truedim<-2 #dimension of B=b
|
||||
qs<-dim-truedim # dimension of orthogonal complement of B
|
||||
b1=c(1,1,1,1,1,1,0,0,0,0,0,0)/sqrt(6)
|
||||
b2=c(1,-1,1,-1,1,-1,0,0,0,0,0,0)/sqrt(6)
|
||||
b<-cbind(b1,b2)
|
||||
#b<-b1
|
||||
P<-b%*%t(b)
|
||||
sigma=0.5 #error standard deviation
|
||||
N<-70 #sample size
|
||||
K<-30 #number of arbitrary starting values for curvilinear optimization
|
||||
MAXIT<-30 #maximal number of iterations in curvilinear search algorithm
|
||||
var_vec<-mat.or.vec(m,12)
|
||||
M1_weight<-mat.or.vec(m,13)
|
||||
#colnames(M1_weight)<-c('CVE1','CVE2','CVE3','CVE1_Rcpp','CVE2_Rcpp','CVE3_Rcpp','meanMAVE','csMAVE','phd','sir','save','CVE4')
|
||||
#link function for M1
|
||||
fM1<-function(x){return(x[1]/(0.5+(x[2]+1.5)^2))}
|
||||
for (i in 1:m){
|
||||
#generate dat according to M1
|
||||
dat<-creat_sample_nor_nonstand(b,N,fsquare,diag(rep(1,dim)),sigma)
|
||||
#est sample covariance matrix
|
||||
Sig_est<-est_varmat(dat[,-1])
|
||||
#est trace of sample covariance matrix
|
||||
tr<-var_tr(Sig_est)
|
||||
|
||||
#calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs
|
||||
CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
|
||||
CVE4<-stiefl_weight_partial_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
CVE5<-stiefl_weight_partial_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
CVE6<-stiefl_weight_partial_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
#
|
||||
CVE7<-stiefl_weight_full_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
CVE8<-stiefl_weight_full_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
CVE9<-stiefl_weight_full_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
#
|
||||
#
|
||||
var_vec[i,1]<-CVE1$var
|
||||
var_vec[i,2]<-CVE2$var
|
||||
var_vec[i,3]<-CVE3$var
|
||||
|
||||
var_vec[i,4]<-CVE4$var
|
||||
var_vec[i,5]<-CVE5$var
|
||||
var_vec[i,6]<-CVE6$var
|
||||
#
|
||||
var_vec[i,7]<-CVE7$var
|
||||
var_vec[i,8]<-CVE8$var
|
||||
var_vec[i,9]<-CVE9$var
|
||||
|
||||
CVE1$est_base<-fill_base(CVE1$est_base)
|
||||
CVE2$est_base<-fill_base(CVE2$est_base)
|
||||
CVE3$est_base<-fill_base(CVE3$est_base)
|
||||
CVE4$est_base<-fill_base(CVE4$est_base)
|
||||
CVE5$est_base<-fill_base(CVE5$est_base)
|
||||
CVE6$est_base<-fill_base(CVE6$est_base)
|
||||
CVE7$est_base<-fill_base(CVE7$est_base)
|
||||
CVE8$est_base<-fill_base(CVE8$est_base)
|
||||
CVE9$est_base<-fill_base(CVE9$est_base)
|
||||
|
||||
# calculate distance between true B and estimated B
|
||||
M1_weight[i,1]<-subspace_dist(CVE1$est_base[,1:truedim],b)
|
||||
M1_weight[i,2]<-subspace_dist(CVE2$est_base[,1:truedim],b)
|
||||
M1_weight[i,3]<-subspace_dist(CVE3$est_base[,1:truedim],b)
|
||||
M1_weight[i,4]<-subspace_dist(CVE4$est_base[,1:truedim],b)
|
||||
M1_weight[i,5]<-subspace_dist(CVE5$est_base[,1:truedim],b)
|
||||
M1_weight[i,6]<-subspace_dist(CVE6$est_base[,1:truedim],b)
|
||||
M1_weight[i,7]<-subspace_dist(CVE7$est_base[,1:truedim],b)
|
||||
M1_weight[i,8]<-subspace_dist(CVE8$est_base[,1:truedim],b)
|
||||
M1_weight[i,9]<-subspace_dist(CVE9$est_base[,1:truedim],b)
|
||||
|
||||
|
||||
|
||||
CVE1_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,nObs=N^0.8,attempts=K,tol=10^(-3),slack=0)[[2]]
|
||||
CVE2_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,nObs=N^(2/3),attempts=K,tol=10^(-3),slack=0)[[2]]
|
||||
CVE3_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,nObs=N^0.5,attempts=K,tol=10^(-3),slack=0)[[2]]
|
||||
|
||||
# CVE4_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,h=h_opt,attempts=K,tol=10^(-3))[[2]]
|
||||
#M1_Rcpp[i,12]<-subspace_dist(CVE4_Rcpp$B,b)
|
||||
|
||||
var_vec[i,10]<-CVE1_Rcpp$loss
|
||||
var_vec[i,11]<-CVE2_Rcpp$loss
|
||||
var_vec[i,12]<-CVE3_Rcpp$loss
|
||||
|
||||
#calculate orthogonal complement of Vhat_k
|
||||
#i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs))
|
||||
|
||||
|
||||
M1_weight[i,10]<-subspace_dist(CVE1_Rcpp$B,b)
|
||||
M1_weight[i,11]<-subspace_dist(CVE2_Rcpp$B,b)
|
||||
M1_weight[i,12]<-subspace_dist(CVE3_Rcpp$B,b)
|
||||
#meanMAVE
|
||||
mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE')
|
||||
M1_weight[i,13]<-subspace_dist(mod_t2$dir[[truedim]],b)
|
||||
|
||||
print(paste(i,paste('/',m)))
|
||||
}
|
||||
boxplot(M1_weight[1:(i-1),]/sqrt(2*truedim),names=colnames(M1_weight),ylab='err',main='M1')
|
||||
summary(M1_weight[1:(i-1),])
|
|
@ -0,0 +1,222 @@
|
|||
LV_weight_partial<-function(V,Xl,dtemp,h,q,Y,grad=T){
|
||||
N<-length(Y)
|
||||
if(is.vector(V)){k<-1}
|
||||
else{k<-length(V[1,])}
|
||||
Xlv<-Xl%*%V
|
||||
d<-dtemp-((Xlv^2)%*%rep(1,k))
|
||||
w<-exp(-0.5*(d/h)^2)#dnorm(d/h)/dnorm(0)
|
||||
w<-matrix(w,N,q)
|
||||
wn<-apply(w,2,sum)#new
|
||||
w<-apply(w,2,column_normalize)
|
||||
mY<-t(w)%*%Y
|
||||
sig<-t(w)%*%(Y^2)-(mY)^2
|
||||
W<-diag(kronecker(t(wn),rep(1,N)))##new
|
||||
if(grad==T){
|
||||
grad<-mat.or.vec(dim,k)
|
||||
tmp1<-(kronecker(sig,rep(1,N))-(as.vector(kronecker(rep(1,q),Y))-kronecker(mY,rep(1,N)))^2)
|
||||
if(k==1){
|
||||
grad_d<- -2*Xl*as.vector(Xlv)
|
||||
grad<-(1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1 #new
|
||||
# wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))
|
||||
# grad<- wn_grad%*%(sig-rep(var1[2],q))/(sum(wn))+grad
|
||||
}
|
||||
else{
|
||||
for (j in 1:(k)){
|
||||
grad_d<- -2*Xl*as.vector(Xlv[,j])
|
||||
grad[,j]<- (1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new
|
||||
# wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))
|
||||
# grad[,j]<- wn_grad%*%(sig-rep(var1[2],q))/(sum(wn))+grad
|
||||
}
|
||||
}
|
||||
ret<-list(t(wn)%*%sig/sum(wn),sig,grad)#new
|
||||
names(ret)<-c('var','sig','grad')
|
||||
}
|
||||
else{
|
||||
ret<-list(t(wn)%*%sig/sum(wn),sig)#new
|
||||
names(ret)<-c('var','sig')
|
||||
}
|
||||
|
||||
return(ret)
|
||||
}
|
||||
######
|
||||
LV_weight_full<-function(V,Xl,dtemp,h,q,Y,grad=T){
|
||||
N<-length(Y)
|
||||
if(is.vector(V)){k<-1}
|
||||
else{k<-length(V[1,])}
|
||||
Xlv<-Xl%*%V
|
||||
d<-dtemp-((Xlv^2)%*%rep(1,k))
|
||||
w<-exp(-0.5*(d/h)^2)#dnorm(d/h)/dnorm(0)
|
||||
w<-matrix(w,N,q)
|
||||
wn<-apply(w,2,sum)#new
|
||||
w<-apply(w,2,column_normalize)
|
||||
mY<-t(w)%*%Y
|
||||
sig<-t(w)%*%(Y^2)-(mY)^2
|
||||
W<-(kronecker(t(wn),rep(1,N)))#new or ###diag(kronecker(t(wn),rep(1,N)))
|
||||
var<-t(wn)%*%sig/sum(wn)#new
|
||||
if(grad==T){
|
||||
grad<-mat.or.vec(dim,k)
|
||||
tmp1<-(kronecker(sig,rep(1,N))-(as.vector(kronecker(rep(1,q),Y))-kronecker(mY,rep(1,N)))^2)
|
||||
if(k==1){
|
||||
grad_d<- -2*Xl*as.vector(Xlv)
|
||||
grad<-(1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new
|
||||
wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))#new
|
||||
grad<- wn_grad%*%(sig-rep(var,q))/(sum(wn))+grad#new
|
||||
}
|
||||
else{
|
||||
for (j in 1:(k)){
|
||||
grad_d<- -2*Xl*as.vector(Xlv[,j])
|
||||
grad[,j]<- (1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new
|
||||
wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))#new
|
||||
grad[,j]<- wn_grad%*%(sig-rep(var,q))/(sum(wn))+grad[,j]#new
|
||||
}
|
||||
}
|
||||
ret<-list(var,sig,grad)
|
||||
names(ret)<-c('var','sig','grad')
|
||||
}
|
||||
else{
|
||||
ret<-list(var,sig)
|
||||
names(ret)<-c('var','sig')
|
||||
}
|
||||
|
||||
return(ret)
|
||||
}
|
||||
####
|
||||
stiefl_weight_partial_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0){
|
||||
Y<-dat[,1]
|
||||
X<-dat[,-1]
|
||||
N<-length(Y)
|
||||
dim<-length(X[1,])
|
||||
if(p<1){
|
||||
S<-est_varmat(X)
|
||||
tmp1<-q_ind(X,S,p)
|
||||
q<-tmp1$q
|
||||
ind<-tmp1$ind
|
||||
}
|
||||
else{
|
||||
q<-N
|
||||
ind<-1:N
|
||||
}
|
||||
Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N)))
|
||||
dtemp<-apply(Xl,1,norm2)
|
||||
if(is.null(h)){
|
||||
S<-est_varmat(X)
|
||||
tr<-var_tr(S)
|
||||
h<-choose_h_2(dim,k,N,nObs,tr)
|
||||
}
|
||||
best<-exp(10000)
|
||||
Vend<-mat.or.vec(dim,k)
|
||||
sig<-mat.or.vec(q,1)
|
||||
for(u in 1:k0){
|
||||
Vnew<-Vold<-stiefl_startval(dim,k)
|
||||
#print(Vold)
|
||||
#print(LV(Vold,Xl,dtemp,h,q,Y)$var)
|
||||
Lnew<-Lold<-exp(10000)
|
||||
lambda<-lambda_0
|
||||
err<-10
|
||||
count<-0
|
||||
count2<-0
|
||||
while(err>tol&count<maxit){
|
||||
#print(Vold)
|
||||
tmp2<-LV_weight_partial(Vold,Xl,dtemp,h,q,Y)
|
||||
G<-tmp2$grad
|
||||
Lold<-tmp2$var
|
||||
W<-G%*%t(Vold)-Vold%*%t(G)
|
||||
stepsize<-lambda#/(2*sqrt(count+1))
|
||||
Vnew<-solve(diag(1,dim)+stepsize*W)%*%(diag(1,dim)-stepsize*W)%*%Vold
|
||||
# print(Vnew)
|
||||
tmp3<-LV_weight_partial(Vnew,Xl,dtemp,h,q,Y,grad=F)
|
||||
Lnew<-tmp3$var
|
||||
err<-sqrt(sum((Vold%*%t(Vold)-Vnew%*%t(Vnew))^2))/sqrt(2*k)#sqrt(sum(tmp3$grad^2))/(dim*k)#
|
||||
#print(err)
|
||||
if(((Lnew-Lold)/Lold) > sclack_para){#/(count+1)^(0.5)
|
||||
lambda=lambda/2
|
||||
err<-10
|
||||
count2<-count2+1
|
||||
count<-count-1
|
||||
Vnew<-Vold #!!!!!
|
||||
|
||||
}
|
||||
Vold<-Vnew
|
||||
count<-count+1
|
||||
#print(count)
|
||||
}
|
||||
if(best>Lnew){
|
||||
best<-Lnew
|
||||
Vend<-Vnew
|
||||
sig<-tmp3$sig
|
||||
}
|
||||
}
|
||||
ret<-list(Vend,best,sig,count,h,count2)
|
||||
names(ret)<-c('est_base','var','aov_dat','count','h','count2')
|
||||
return(ret)
|
||||
}
|
||||
###########
|
||||
stiefl_weight_full_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0){
|
||||
Y<-dat[,1]
|
||||
X<-dat[,-1]
|
||||
N<-length(Y)
|
||||
dim<-length(X[1,])
|
||||
if(p<1){
|
||||
S<-est_varmat(X)
|
||||
tmp1<-q_ind(X,S,p)
|
||||
q<-tmp1$q
|
||||
ind<-tmp1$ind
|
||||
}
|
||||
else{
|
||||
q<-N
|
||||
ind<-1:N
|
||||
}
|
||||
Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N)))
|
||||
dtemp<-apply(Xl,1,norm2)
|
||||
if(is.null(h)){
|
||||
S<-est_varmat(X)
|
||||
tr<-var_tr(S)
|
||||
h<-choose_h_2(dim,k,N,nObs,tr)
|
||||
}
|
||||
best<-exp(10000)
|
||||
Vend<-mat.or.vec(dim,k)
|
||||
sig<-mat.or.vec(q,1)
|
||||
for(u in 1:k0){
|
||||
Vnew<-Vold<-stiefl_startval(dim,k)
|
||||
#print(Vold)
|
||||
#print(LV(Vold,Xl,dtemp,h,q,Y)$var)
|
||||
Lnew<-Lold<-exp(10000)
|
||||
lambda<-lambda_0
|
||||
err<-10
|
||||
count<-0
|
||||
count2<-0
|
||||
while(err>tol&count<maxit){
|
||||
#print(Vold)
|
||||
tmp2<-LV_weight_full(Vold,Xl,dtemp,h,q,Y)
|
||||
G<-tmp2$grad
|
||||
Lold<-tmp2$var
|
||||
W<-G%*%t(Vold)-Vold%*%t(G)
|
||||
stepsize<-lambda#/(2*sqrt(count+1))
|
||||
Vnew<-solve(diag(1,dim)+stepsize*W)%*%(diag(1,dim)-stepsize*W)%*%Vold
|
||||
# print(Vnew)
|
||||
tmp3<-LV_weight_full(Vnew,Xl,dtemp,h,q,Y,grad=F)
|
||||
Lnew<-tmp3$var
|
||||
err<-sqrt(sum((Vold%*%t(Vold)-Vnew%*%t(Vnew))^2))/sqrt(2*k)#sqrt(sum(tmp3$grad^2))/(dim*k)#
|
||||
#print(err)
|
||||
if(((Lnew-Lold)/Lold) > sclack_para){#/(count+1)^(0.5)
|
||||
lambda=lambda/2
|
||||
err<-10
|
||||
count2<-count2+1
|
||||
count<-count-1
|
||||
Vnew<-Vold #!!!!!
|
||||
|
||||
}
|
||||
Vold<-Vnew
|
||||
count<-count+1
|
||||
#print(count)
|
||||
}
|
||||
if(best>Lnew){
|
||||
best<-Lnew
|
||||
Vend<-Vnew
|
||||
sig<-tmp3$sig
|
||||
}
|
||||
}
|
||||
ret<-list(Vend,best,sig,count,h,count2)
|
||||
names(ret)<-c('est_base','var','aov_dat','count','h','count2')
|
||||
return(ret)
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
\documentclass[12pt,a4paper]{article}
|
||||
|
||||
\usepackage[utf8]{inputenc}
|
||||
\usepackage[T1]{fontenc}
|
||||
\usepackage{amsmath, amsfonts, amssymb, amsthm}
|
||||
\usepackage{fullpage}
|
||||
|
||||
\newcommand{\vecl}{\ensuremath{\operatorname{vec}_l}}
|
||||
\newcommand{\Sym}{\ensuremath{\operatorname{Sym}}}
|
||||
|
||||
\begin{document}
|
||||
|
||||
Indexing a given matrix $A = (a_{ij})_{i,j = 1, ..., n} \in \mathbb{R}^{n\times n}$ given as
|
||||
\begin{displaymath}
|
||||
A = \begin{pmatrix}
|
||||
a_{0,0} & a_{0,1} & a_{0,2} & \ldots & a_{0,n-1} \\
|
||||
a_{1,0} & a_{1,1} & a_{1,2} & \ldots & a_{1,n-1} \\
|
||||
a_{2,0} & a_{2,1} & a_{2,2} & \ldots & a_{2,n-1} \\
|
||||
\vdots & \vdots & \vdots & \ddots & \vdots \\
|
||||
a_{n-1,0} & a_{n-1,1} & a_{n-1,2} & \ldots & a_{n-1,n-1}
|
||||
\end{pmatrix}
|
||||
\end{displaymath}
|
||||
|
||||
A symmetric matrix with zero main diagonal, meaning a matrix $S = S^T$ with $S_{i,i} = 0,\ \forall i = 1,..,n$ is givne in the following form
|
||||
\begin{displaymath}
|
||||
S = \begin{pmatrix}
|
||||
0 & s_{1,0} & s_{2,0} & \ldots & s_{n-1,0} \\
|
||||
s_{1,0} & 0 & s_{2,1} & \ldots & s_{n-1,1} \\
|
||||
s_{2,0} & s_{2,1} & 0 & \ldots & s_{n-1,2} \\
|
||||
\vdots & \vdots & \vdots & \ddots & \vdots \\
|
||||
s_{n-1,0} & s_{n-1,1} & s_{n-1,2} & \ldots & 0
|
||||
\end{pmatrix}
|
||||
\end{displaymath}
|
||||
Therefore its sufficient to store only the lower triangular part, for memory efficiency and some further alrogithmic shortcuts (sometime they are more expencife) the symmetric matrix $S$ is stored in packed form, meanin in a vector of the length $\frac{n(n-1)}{2}$. We use (like for matrices) a column-major order of elements and define the $\vecl:\Sym(n)\to \mathbb{R}^{n(n-1) / 2}$ opperator defined as
|
||||
|
||||
\begin{displaymath}
|
||||
\vecl(S) = (s_{1,0}, s_{2,0},\cdots,s_{n-1,0},s_{2,1}\cdots,s_{n-1,n-2})^T
|
||||
\end{displaymath}
|
||||
|
||||
The relation between the matrix indices $i,j$ and the $\vecl$ index $k$ is given by
|
||||
|
||||
\begin{displaymath}
|
||||
(\vecl(S)_k = s_{i,j} \quad\Leftrightarrow\quad k = jn+i) : j \in \{0,...,n-2\} \land j < i < n.
|
||||
\end{displaymath}
|
||||
|
||||
\end{document}
|
Loading…
Reference in New Issue