add: cve_linesearch,
fix: cleaned and optimized gradient, add. notes
This commit is contained in:
parent
1c120ec67c
commit
7d4d01a9a7
|
@ -6,6 +6,7 @@ export(col.pair.apply)
|
|||
export(cve)
|
||||
export(cve.call)
|
||||
export(cve.grid.search)
|
||||
export(cve_linesearch)
|
||||
export(cve_sgd)
|
||||
export(cve_simple)
|
||||
export(dataset)
|
||||
|
|
|
@ -94,6 +94,8 @@ cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5, k, ...) {
|
|||
method <- tolower(method)
|
||||
if (method == 'simple') {
|
||||
dr <- cve_simple(X, Y, k, nObs = nObs, ...)
|
||||
} else if (method == 'linesearch') {
|
||||
dr <- cve_linesearch(X, Y, k, nObs = nObs, ...)
|
||||
} else if (method == 'sgd') {
|
||||
dr <- cve_sgd(X, Y, k, nObs = nObs, ...)
|
||||
} else {
|
||||
|
|
|
@ -0,0 +1,163 @@
|
|||
#' Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
|
||||
#' conditions.
|
||||
#'
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
cve_linesearch <- function(X, Y, k,
|
||||
nObs = sqrt(nrow(X)),
|
||||
h = NULL,
|
||||
tau = 1.0,
|
||||
tol = 1e-3,
|
||||
rho1 = 0.1,
|
||||
rho2 = 0.9,
|
||||
slack = 0,
|
||||
epochs = 50L,
|
||||
attempts = 10L,
|
||||
max.linesearch.iter = 10L
|
||||
) {
|
||||
# Set `grad` functions environment to enable if to find this environments
|
||||
# local variabels, needed to enable the manipulation of this local variables
|
||||
# from within `grad`.
|
||||
environment(grad) <- environment()
|
||||
|
||||
# Setup histories.
|
||||
loss.history <- matrix(NA, epochs, attempts)
|
||||
error.history <- matrix(NA, epochs, attempts)
|
||||
tau.history <- matrix(NA, epochs, attempts)
|
||||
|
||||
# Get dimensions.
|
||||
n <- nrow(X)
|
||||
p <- ncol(X)
|
||||
q <- p - k
|
||||
|
||||
# Save initial learning rate `tau`.
|
||||
tau.init <- tau
|
||||
# Addapt tolearance for break condition.
|
||||
tol <- sqrt(2 * q) * tol
|
||||
|
||||
# Estaimate bandwidth if not given.
|
||||
if (missing(h) | !is.numeric(h)) {
|
||||
h <- estimate.bandwidth(X, k, nObs)
|
||||
}
|
||||
|
||||
# Compute persistent data.
|
||||
# Compute lookup indexes for symmetrie, lower/upper
|
||||
# triangular parts and vectorization.
|
||||
pair.index <- elem.pairs(seq(n))
|
||||
i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
|
||||
# Matrix of vectorized indices. (vec(index) -> seq)
|
||||
index <- matrix(seq(n * n), n, n)
|
||||
lower <- index[lower.tri(index)]
|
||||
upper <- t(index)[lower]
|
||||
|
||||
# Create all pairewise differences of rows of `X`.
|
||||
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||
# Identity matrix.
|
||||
I_p <- diag(1, p)
|
||||
|
||||
# Init tracking of current best (according multiple attempts).
|
||||
V.best <- NULL
|
||||
loss.best <- Inf
|
||||
|
||||
# Start loop for multiple attempts.
|
||||
for (attempt in 1:attempts) {
|
||||
|
||||
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
|
||||
# optimization start value.
|
||||
V <- rStiefl(p, q)
|
||||
|
||||
# Initial loss and gradient.
|
||||
loss <- Inf
|
||||
G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
|
||||
# Set last loss (aka, loss after applying the step).
|
||||
loss.last <- loss
|
||||
|
||||
## Start optimization loop.
|
||||
for (epoch in 1:epochs) {
|
||||
|
||||
# Cayley transform matrix `A`
|
||||
A <- (G %*% t(V)) - (V %*% t(G))
|
||||
|
||||
# Directional derivative of the loss at current position, given
|
||||
# as `Tr(G^T \cdot A \cdot V)`.
|
||||
loss.prime <- -0.5 * norm(A, type = 'F')^2
|
||||
|
||||
# Linesearch
|
||||
tau.upper <- Inf
|
||||
tau.lower <- 0
|
||||
tau <- tau.init
|
||||
for (iter in 1:max.linesearch.iter) {
|
||||
# Apply learning rate `tau`.
|
||||
A.tau <- (tau / 2) * A
|
||||
# Parallet transport (on Stiefl manifold) into direction of `G`.
|
||||
inv <- solve(I_p + A.tau)
|
||||
V.tau <- inv %*% ((I_p - A.tau) %*% V)
|
||||
|
||||
# Loss at position after a step.
|
||||
loss <- Inf # aka loss.tau
|
||||
G.tau <- grad(X, Y, V.tau, h, loss.out = TRUE, persistent = TRUE)
|
||||
|
||||
# Armijo condition.
|
||||
if (loss > loss.last + (rho1 * tau * loss.prime)) {
|
||||
tau.upper <- tau
|
||||
tau <- (tau.lower + tau.upper) / 2
|
||||
next()
|
||||
}
|
||||
|
||||
V.prime.tau <- -0.5 * inv %*% A %*% (V + V.tau)
|
||||
loss.prime.tau <- sum(G * V.prime.tau) # Tr(grad(tau)^T \cdot Y^'(tau))
|
||||
|
||||
# Wolfe condition.
|
||||
if (loss.prime.tau < rho2 * loss.prime) {
|
||||
tau.lower <- tau
|
||||
if (tau.upper == Inf) {
|
||||
tau <- 2 * tau.lower
|
||||
} else {
|
||||
tau <- (tau.lower + tau.upper) / 2
|
||||
}
|
||||
} else {
|
||||
break()
|
||||
}
|
||||
}
|
||||
|
||||
# Compute error.
|
||||
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
|
||||
|
||||
# Write history.
|
||||
loss.history[epoch, attempt] <- loss
|
||||
error.history[epoch, attempt] <- error
|
||||
tau.history[epoch, attempt] <- tau
|
||||
|
||||
# Check break condition (epoch check to skip ignored gradient calc).
|
||||
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
|
||||
if (error < tol | epoch >= epochs) {
|
||||
# take last step and stop optimization.
|
||||
V <- V.tau
|
||||
break()
|
||||
}
|
||||
|
||||
# Perform the step and remember previous loss.
|
||||
V <- V.tau
|
||||
loss.last <- loss
|
||||
G <- G.tau
|
||||
}
|
||||
|
||||
# Check if current attempt improved previous ones
|
||||
if (loss < loss.best) {
|
||||
loss.best <- loss
|
||||
V.best <- V
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return(list(
|
||||
loss.history = loss.history,
|
||||
error.history = error.history,
|
||||
tau.history = tau.history,
|
||||
loss = loss.best,
|
||||
V = V.best,
|
||||
B = null(V.best),
|
||||
h = h
|
||||
))
|
||||
}
|
|
@ -7,10 +7,20 @@ cve_sgd <- function(X, Y, k,
|
|||
nObs = sqrt(nrow(X)),
|
||||
h = NULL,
|
||||
tau = 0.01,
|
||||
tol = 1e-3,
|
||||
epochs = 50L,
|
||||
batch.size = 16L,
|
||||
attempts = 10L
|
||||
) {
|
||||
# Set `grad` functions environment to enable if to find this environments
|
||||
# local variabels, needed to enable the manipulation of this local variables
|
||||
# from within `grad`.
|
||||
environment(grad) <- environment()
|
||||
|
||||
# Setup histories.
|
||||
loss.history <- matrix(NA, epochs, attempts)
|
||||
error.history <- matrix(NA, epochs, attempts)
|
||||
|
||||
# Get dimensions.
|
||||
n <- nrow(X) # Number of samples.
|
||||
p <- ncol(X) # Data dimensions
|
||||
|
@ -18,15 +28,31 @@ cve_sgd <- function(X, Y, k,
|
|||
|
||||
# Save initial learning rate `tau`.
|
||||
tau.init <- tau
|
||||
# Addapt tolearance for break condition.
|
||||
tol <- sqrt(2 * q) * tol
|
||||
|
||||
# Estaimate bandwidth if not given.
|
||||
if (missing(h) | !is.numeric(h)) {
|
||||
h <- estimate.bandwidth(X, k, nObs)
|
||||
}
|
||||
|
||||
# Compute persistent data.
|
||||
# Compute lookup indexes for symmetrie, lower/upper
|
||||
# triangular parts and vectorization.
|
||||
pair.index <- elem.pairs(seq(n))
|
||||
i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
|
||||
# Matrix of vectorized indices. (vec(index) -> seq)
|
||||
index <- matrix(seq(n * n), n, n)
|
||||
lower <- index[lower.tri(index)]
|
||||
upper <- t(index)[lower]
|
||||
|
||||
# Create all pairewise differences of rows of `X`.
|
||||
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||
# Identity matrix.
|
||||
I_p <- diag(1, p)
|
||||
# Init a list of data indices (shuffled for batching).
|
||||
indices <- seq(n)
|
||||
I_p <- diag(1, p)
|
||||
|
||||
# Init tracking of current best (according multiple attempts).
|
||||
V.best <- NULL
|
||||
|
@ -40,6 +66,8 @@ cve_sgd <- function(X, Y, k,
|
|||
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
|
||||
# optimization start value.
|
||||
V <- rStiefl(p, q)
|
||||
# Keep track of last `V` for computing error after an epoch.
|
||||
V.last <- V
|
||||
|
||||
# Repeat `epochs` times
|
||||
for (epoch in 1:epochs) {
|
||||
|
@ -55,7 +83,7 @@ cve_sgd <- function(X, Y, k,
|
|||
|
||||
# Compute batch gradient.
|
||||
loss <- NULL
|
||||
G <- grad(X[batch, ], Y[batch], V, h)
|
||||
G <- grad(X[batch, ], Y[batch], V, h, loss.out = TRUE)
|
||||
|
||||
# Cayley transform matrix.
|
||||
A <- (G %*% t(V)) - (V %*% t(G))
|
||||
|
@ -65,27 +93,34 @@ cve_sgd <- function(X, Y, k,
|
|||
# Parallet transport (on Stiefl manifold) into direction of `G`.
|
||||
V <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
|
||||
}
|
||||
}
|
||||
# Compute actuall loss after finishing optimization.
|
||||
loss <- grad(X, Y, V, h, loss.only = TRUE)
|
||||
# After each attempt, check if last attempt reached a better result.
|
||||
if (!is.null(V.best)) { # Only required if there is already a result.
|
||||
if (loss < loss.best) {
|
||||
loss.best <- loss
|
||||
V.best <- V
|
||||
# Compute actuall loss after finishing optimization.
|
||||
loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
|
||||
# And the error for the history.
|
||||
error <- norm(V.last %*% t(V.last) - V %*% t(V), type = "F")
|
||||
V.last <- V
|
||||
|
||||
# Finaly write history.
|
||||
loss.history[epoch, attempt] <- loss
|
||||
error.history[epoch, attempt] <- error
|
||||
|
||||
# Check break condition.
|
||||
if (error < tol) {
|
||||
break()
|
||||
}
|
||||
} else {
|
||||
}
|
||||
# After each attempt, check if last attempt reached a better result.
|
||||
if (loss < loss.best) {
|
||||
loss.best <- loss
|
||||
V.best <- V
|
||||
}
|
||||
}
|
||||
|
||||
return(list(
|
||||
X = X, Y = Y, k = k,
|
||||
nObs = nObs, h = h, tau = tau,
|
||||
epochs = epochs, batch = batch, attempts = attempts,
|
||||
loss.history = loss.history,
|
||||
error.history = error.history,
|
||||
loss = loss.best,
|
||||
V = V.best,
|
||||
B = null(V.best)
|
||||
B = null(V.best),
|
||||
h = h
|
||||
))
|
||||
}
|
||||
|
|
|
@ -17,8 +17,9 @@ cve_simple <- function(X, Y, k,
|
|||
# from within `grad`.
|
||||
environment(grad) <- environment()
|
||||
|
||||
# Setup loss histroy.
|
||||
loss.history <- matrix(NA, epochs, attempts);
|
||||
# Setup histories.
|
||||
loss.history <- matrix(NA, epochs, attempts)
|
||||
error.history <- matrix(NA, epochs, attempts)
|
||||
|
||||
# Get dimensions.
|
||||
n <- nrow(X)
|
||||
|
@ -35,6 +36,20 @@ cve_simple <- function(X, Y, k,
|
|||
h <- estimate.bandwidth(X, k, nObs)
|
||||
}
|
||||
|
||||
# Compute persistent data.
|
||||
# Compute lookup indexes for symmetrie, lower/upper
|
||||
# triangular parts and vectorization.
|
||||
pair.index <- elem.pairs(seq(n))
|
||||
i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
|
||||
# Matrix of vectorized indices. (vec(index) -> seq)
|
||||
index <- matrix(seq(n * n), n, n)
|
||||
lower <- index[lower.tri(index)]
|
||||
upper <- t(index)[lower]
|
||||
|
||||
# Create all pairewise differences of rows of `X`.
|
||||
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||
# Identity matrix.
|
||||
I_p <- diag(1, p)
|
||||
|
||||
# Init tracking of current best (according multiple attempts).
|
||||
|
@ -53,7 +68,7 @@ cve_simple <- function(X, Y, k,
|
|||
|
||||
# Initial loss and gradient.
|
||||
loss <- Inf
|
||||
G <- grad(X, Y, V, h, loss.out = TRUE) # `loss.out=T` sets `loss`!
|
||||
G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
|
||||
# Set last loss (aka, loss after applying the step).
|
||||
loss.last <- loss
|
||||
|
||||
|
@ -68,7 +83,7 @@ cve_simple <- function(X, Y, k,
|
|||
V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
|
||||
|
||||
# Loss at position after a step.
|
||||
loss <- grad(X, Y, V.tau, h, loss.only = TRUE)
|
||||
loss <- grad(X, Y, V.tau, h, loss.only = TRUE, persistent = TRUE)
|
||||
|
||||
# Check if step is appropriate
|
||||
if ((loss - loss.last) > slack * loss.last) {
|
||||
|
@ -78,6 +93,11 @@ cve_simple <- function(X, Y, k,
|
|||
|
||||
# Compute error.
|
||||
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
|
||||
|
||||
# Write history.
|
||||
loss.history[epoch, attempt] <- loss
|
||||
error.history[epoch, attempt] <- error
|
||||
|
||||
# Check break condition (epoch check to skip ignored gradient calc).
|
||||
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
|
||||
if (error < tol | epoch >= epochs) {
|
||||
|
@ -91,8 +111,7 @@ cve_simple <- function(X, Y, k,
|
|||
loss.last <- loss
|
||||
|
||||
# Compute gradient at new position.
|
||||
# Note: `loss` will be updated too!
|
||||
G <- grad(X, Y, V, h, loss.out = TRUE, loss.log = TRUE)
|
||||
G <- grad(X, Y, V, h, persistent = TRUE)
|
||||
|
||||
# Cayley transform matrix `A`
|
||||
A <- (G %*% t(V)) - (V %*% t(G))
|
||||
|
@ -108,6 +127,7 @@ cve_simple <- function(X, Y, k,
|
|||
|
||||
return(list(
|
||||
loss.history = loss.history,
|
||||
error.history = error.history,
|
||||
loss = loss.best,
|
||||
V = V.best,
|
||||
B = null(V.best),
|
||||
|
|
|
@ -4,38 +4,47 @@
|
|||
#' @param Y Responce.
|
||||
#' @param V Position to compute the gradient at, aka point on Stiefl manifold.
|
||||
#' @param h Bandwidth
|
||||
#' @param loss.out Iff \code{TRUE} loss will be written to parent environment.
|
||||
#' @param loss.only Boolean to only compute the loss, of \code{TRUE} a single
|
||||
#' value loss is returned and \code{envir} is ignored.
|
||||
#' @param persistent Determines if data indices and dependent calculations shall
|
||||
#' be reused from the parent environment. ATTENTION: Do NOT set this flag, only
|
||||
#' intended for internal usage by carefully aligned functions!
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
grad <- function(X, Y, V, h, loss.out = FALSE, loss.log = FALSE, loss.only = FALSE) {
|
||||
grad <- function(X, Y, V, h,
|
||||
loss.out = FALSE,
|
||||
loss.only = FALSE,
|
||||
persistent = FALSE) {
|
||||
# Get number of samples and dimension.
|
||||
n <- nrow(X)
|
||||
p <- ncol(X)
|
||||
|
||||
# Compute lookup indexes for symmetrie, lower/upper
|
||||
# triangular parts and vectorization.
|
||||
pair.index <- elem.pairs(seq(n))
|
||||
i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
|
||||
# Matrix of vectorized indices. (vec(index) -> seq)
|
||||
index <- matrix(seq(n * n), n, n)
|
||||
lower <- index[lower.tri(index)]
|
||||
upper <- t(index)[lower]
|
||||
if (!persistent) {
|
||||
# Compute lookup indexes for symmetrie, lower/upper
|
||||
# triangular parts and vectorization.
|
||||
pair.index <- elem.pairs(seq(n))
|
||||
i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
|
||||
# Matrix of vectorized indices. (vec(index) -> seq)
|
||||
index <- matrix(seq(n * n), n, n)
|
||||
lower <- index[lower.tri(index)]
|
||||
upper <- t.default(index)[lower]
|
||||
|
||||
# Create all pairewise differences of rows of `X`.
|
||||
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||
# Create all pairewise differences of rows of `X`.
|
||||
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||
}
|
||||
|
||||
# Projection matrix onto `span(V)`
|
||||
Q <- diag(1, p) - (V %*% t(V))
|
||||
Q <- diag(1, p) - tcrossprod(V, V)
|
||||
|
||||
# Vectorized distance matrix `D`.
|
||||
vecD <- rowSums((X_diff %*% Q)^2)
|
||||
|
||||
# Weight matrix `W` (dnorm ... gaussean density function)
|
||||
W <- matrix(dnorm(0), n, n)
|
||||
W[lower] <- dnorm(vecD / h) # Set lower tri. part
|
||||
W[upper] <- t(W)[upper] # Mirror lower tri. to upper
|
||||
W <- matrix(1, n, n) # `exp(0) == 1`
|
||||
W[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part
|
||||
W[upper] <- t.default(W)[upper] # Mirror lower tri. to upper
|
||||
W <- sweep(W, 2, colSums(W), FUN = `/`) # Col-Normalize
|
||||
|
||||
# Weighted `Y` momentums
|
||||
|
@ -44,19 +53,11 @@ grad <- function(X, Y, V, h, loss.out = FALSE, loss.log = FALSE, loss.only = FAL
|
|||
|
||||
# Per example loss `L(V, X_i)`
|
||||
L <- y2 - y1^2
|
||||
if (loss.out | loss.log | loss.only) {
|
||||
meanL <- mean(L)
|
||||
if (loss.out) {
|
||||
# Bubble environments up and write to loss variable, aka out param.
|
||||
loss <<- meanL
|
||||
}
|
||||
if (loss.log) {
|
||||
loss.history[epoch, attempt] <<- meanL
|
||||
}
|
||||
if (loss.only) {
|
||||
# Mean for total loss `L(V)`.
|
||||
return(meanL)
|
||||
}
|
||||
if (loss.only) {
|
||||
return(mean(L))
|
||||
}
|
||||
if (loss.out) {
|
||||
loss <<- mean(L)
|
||||
}
|
||||
|
||||
# Vectorized Weights with forced symmetry
|
||||
|
@ -66,7 +67,7 @@ grad <- function(X, Y, V, h, loss.out = FALSE, loss.log = FALSE, loss.only = FAL
|
|||
vecS <- vecS * vecD
|
||||
|
||||
# The gradient.
|
||||
G <- t(X_diff) %*% sweep(X_diff %*% V, 1, vecS, `*`)
|
||||
G <- crossprod(X_diff, sweep(X_diff %*% V, 1, vecS, `*`))
|
||||
G <- (-2 / (n * h^2)) * G
|
||||
return(G)
|
||||
}
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/cve_linesearch.R
|
||||
\name{cve_linesearch}
|
||||
\alias{cve_linesearch}
|
||||
\title{Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
|
||||
conditions.}
|
||||
\usage{
|
||||
cve_linesearch(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
|
||||
tol = 0.001, rho1 = 0.1, rho2 = 0.9, slack = 0, epochs = 50L,
|
||||
attempts = 10L, max.linesearch.iter = 10L)
|
||||
}
|
||||
\description{
|
||||
Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
|
||||
conditions.
|
||||
}
|
||||
\keyword{internal}
|
|
@ -6,7 +6,7 @@
|
|||
a classic GD method unsing no further tricks.}
|
||||
\usage{
|
||||
cve_sgd(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 0.01,
|
||||
epochs = 50L, batch.size = 16L, attempts = 10L)
|
||||
tol = 0.001, epochs = 50L, batch.size = 16L, attempts = 10L)
|
||||
}
|
||||
\description{
|
||||
Simple implementation of the CVE method. 'Simple' means that this method is
|
||||
|
|
|
@ -4,8 +4,8 @@
|
|||
\alias{grad}
|
||||
\title{Compute get gradient of `L(V)` given a dataset `X`.}
|
||||
\usage{
|
||||
grad(X, Y, V, h, loss.out = FALSE, loss.log = FALSE,
|
||||
loss.only = FALSE)
|
||||
grad(X, Y, V, h, loss.out = FALSE, loss.only = FALSE,
|
||||
persistent = FALSE)
|
||||
}
|
||||
\arguments{
|
||||
\item{X}{Data matrix.}
|
||||
|
@ -16,8 +16,14 @@ grad(X, Y, V, h, loss.out = FALSE, loss.log = FALSE,
|
|||
|
||||
\item{h}{Bandwidth}
|
||||
|
||||
\item{loss.out}{Iff \code{TRUE} loss will be written to parent environment.}
|
||||
|
||||
\item{loss.only}{Boolean to only compute the loss, of \code{TRUE} a single
|
||||
value loss is returned and \code{envir} is ignored.}
|
||||
|
||||
\item{persistent}{Determines if data indices and dependent calculations shall
|
||||
be reused from the parent environment. ATTENTION: Do NOT set this flag, only
|
||||
intended for internal usage by carefully aligned functions!}
|
||||
}
|
||||
\description{
|
||||
Compute get gradient of `L(V)` given a dataset `X`.
|
||||
|
|
86
notes.md
86
notes.md
|
@ -111,3 +111,89 @@ do.call(f, list(quote(A)), envir = env) # f.Global A.new
|
|||
do.call("f", list(as.name("A")), envir = env) # f.new A.new
|
||||
do.call("f", list(as.name("A")), envir = env) # f.new A.new
|
||||
```
|
||||
|
||||
# Performance benchmarks
|
||||
In this section alternative implementations of simple algorithms are compared for there performance.
|
||||
|
||||
### Computing the trace of a matrix multiplication.
|
||||
```R
|
||||
library(microbenchmark)
|
||||
|
||||
A <- matrix(runif(120), 12, 10)
|
||||
|
||||
# Matrix trace.
|
||||
tr <- function(M) sum(diag(M))
|
||||
|
||||
# Check correctnes and benckmark performance.
|
||||
stopifnot(
|
||||
all.equal(
|
||||
tr(t(A) %*% A),
|
||||
sum(diag(t(A) %*% A)),
|
||||
sum(A * A)
|
||||
)
|
||||
)
|
||||
microbenchmark(
|
||||
tr(t(A) %*% A),
|
||||
sum(diag(t(A) %*% A)),
|
||||
sum(A * A)
|
||||
)
|
||||
# Unit: nanoseconds
|
||||
# expr min lq mean median uq max neval
|
||||
# tr(t(A) %*% A) 4335 4713 5076.36 4949.5 5402.5 7928 100
|
||||
# sum(diag(t(A) %*% A)) 4106 4429 5233.89 4733.5 5057.5 49308 100
|
||||
# sum(A * A) 540 681 777.07 740.0 818.5 3572 100
|
||||
```
|
||||
|
||||
```R
|
||||
n <- 200
|
||||
M <- matrix(runif(n^2), n, n)
|
||||
|
||||
dnorm2 <- function(x) exp(-0.5 * x^2) / sqrt(2 * pi)
|
||||
|
||||
stopifnot(
|
||||
all.equal(dnorm(M), dnorm2(M))
|
||||
)
|
||||
microbenchmark(
|
||||
dnorm = dnorm(M),
|
||||
dnorm2 = dnorm2(M),
|
||||
exp = exp(-0.5 * M^2) # without scaling -> irrelevant for usage
|
||||
)
|
||||
# Unit: microseconds
|
||||
# expr min lq mean median uq max neval
|
||||
# dnorm 841.503 843.811 920.7828 855.7505 912.4720 2405.587 100
|
||||
# dnorm2 543.510 580.319 629.5321 597.8540 607.3795 2603.763 100
|
||||
# exp 502.083 535.943 577.2884 548.3745 561.3280 2113.220 100
|
||||
```
|
||||
|
||||
### Using `crosspord()`
|
||||
```R
|
||||
p <- 12
|
||||
q <- 10
|
||||
V <- matrix(runif(p * q), p, q)
|
||||
|
||||
stopifnot(
|
||||
all.equal(V %*% t(V), tcrossprod(V)),
|
||||
all.equal(V %*% t(V), tcrossprod(V, V))
|
||||
)
|
||||
microbenchmark(
|
||||
V %*% t(V),
|
||||
tcrossprod(V),
|
||||
tcrossprod(V, V)
|
||||
)
|
||||
# Unit: microseconds
|
||||
# expr min lq mean median uq max neval
|
||||
# V %*% t(V) 2.293 2.6335 2.94673 2.7375 2.9060 19.592 100
|
||||
# tcrossprod(V) 1.148 1.2475 1.86173 1.3440 1.4650 30.688 100
|
||||
# tcrossprod(V, V) 1.003 1.1575 1.28451 1.2400 1.3685 2.742 100
|
||||
```
|
||||
|
||||
## Using `Rprof()` for performance.
|
||||
The standart method for profiling where an algorithm is spending its time is with `Rprof()`.
|
||||
```R
|
||||
path <- '../tmp/R.prof' # path to profiling file
|
||||
Rprof(path)
|
||||
cve.res <- cve.call(X, Y, k = k)
|
||||
Rprof(NULL)
|
||||
(prof <- summaryRprof(path)) # Summarise results
|
||||
```
|
||||
**Note: considure to run `gc()` before measuring**, aka cleaning up by explicitely calling the garbage collector.
|
||||
|
|
|
@ -7,9 +7,11 @@ tell.user <- function(name, start.time, i, length) {
|
|||
i, "/", length,
|
||||
" - elapsed:", format(Sys.time() - start.time), "\033[K")
|
||||
}
|
||||
|
||||
library(CVE) # load CVE
|
||||
source("CVE_legacy/function_script.R") # Source legacy code
|
||||
subspace.dist <- function(B1, B2){
|
||||
P1 <- B1 %*% solve(t(B1) %*% B1) %*% t(B1)
|
||||
P2 <- B2 %*% solve(t(B2) %*% B2) %*% t(B2)
|
||||
return(norm(P1 - P2, type = 'F'))
|
||||
}
|
||||
|
||||
# Number of simulations
|
||||
SIM.NR <- 50
|
||||
|
@ -20,8 +22,13 @@ ATTEMPTS <- 10
|
|||
# set names of datasets
|
||||
dataset.names <- c("M1", "M2", "M3", "M4", "M5")
|
||||
# Set used CVE method
|
||||
# methods <- c("legacy", "simple", "sgd")
|
||||
methods <- c("legacy", "simple", "sgd")
|
||||
methods <- c("simple") #, "sgd") # "legacy"
|
||||
|
||||
library(CVE) # load CVE
|
||||
if ("legacy" %in% methods) {
|
||||
# Source legacy code (but only if needed)
|
||||
source("CVE_legacy/function_script.R")
|
||||
}
|
||||
|
||||
# Setup error and time tracking variables
|
||||
error <- matrix(NA, SIM.NR, length(methods) * length(dataset.names))
|
||||
|
@ -79,7 +86,7 @@ for (sim in 1:SIM.NR) {
|
|||
}
|
||||
|
||||
key <- paste0(name, '-', method)
|
||||
error[sim, key] <- subspace_dist(dr$B, ds$B) / sqrt(2 * truedim)
|
||||
error[sim, key] <- subspace.dist(dr$B, ds$B) / sqrt(2 * truedim)
|
||||
time[sim, key] <- dr.time["elapsed"]
|
||||
|
||||
# Log results to file (mostly for long running simulations)
|
||||
|
|
Loading…
Reference in New Issue