wip: writing C gradient version
This commit is contained in:
parent
47917fe0bd
commit
4d2651fe8a
|
@ -1,88 +1,71 @@
|
|||
# -----------------------------------------------------------------------------
|
||||
# Program: runtime_test.R
|
||||
# Author: Loki
|
||||
# Date: 2019.08.12
|
||||
#
|
||||
# Purpose:
|
||||
# Comparing runtime of "MAVE" with "CVE".
|
||||
#
|
||||
# RevisionHistory:
|
||||
# Loki -- 2019.08.12 initial creation
|
||||
# -----------------------------------------------------------------------------
|
||||
# Usage:
|
||||
# ~$ Rscript runtime_test.R
|
||||
|
||||
# load CVE package
|
||||
library(CVE)
|
||||
# load MAVE package for comparison
|
||||
library(MAVE)
|
||||
library(CVE) # load CVE
|
||||
|
||||
# set nr of simulations per dataset
|
||||
nr.sim <- 10
|
||||
|
||||
# set names of datasets to run tests on
|
||||
dataset.names <- c("M1", "M2", "M3", "M4", "M5")
|
||||
|
||||
#' Orthogonal projection to sub-space spanned by `B`
|
||||
#'
|
||||
#' @param B Matrix
|
||||
#' @return Orthogonal Projection Matrix
|
||||
proj <- function(B) {
|
||||
B %*% solve(t(B) %*% B) %*% t(B)
|
||||
#' Writes progress to console.
|
||||
tell.user <- function(name, start.time, i, length) {
|
||||
cat("\rRunning Test (", name, "):",
|
||||
i, "/", length,
|
||||
" - elapsed:", format(Sys.time() - start.time), "\033[K")
|
||||
}
|
||||
subspace.dist <- function(B1, B2){
|
||||
P1 <- B1 %*% solve(t(B1) %*% B1) %*% t(B1)
|
||||
P2 <- B2 %*% solve(t(B2) %*% B2) %*% t(B2)
|
||||
return(norm(P1 - P2, type = 'F'))
|
||||
}
|
||||
|
||||
#' Compute nObs given dataset dimension \code{n}.
|
||||
#'
|
||||
#' @param n Number of samples
|
||||
#' @return Numeric estimate of \code{nObs}
|
||||
nObs <- function (n) { n^0.5 }
|
||||
# Number of simulations
|
||||
SIM.NR <- 50
|
||||
# maximal number of iterations in curvilinear search algorithm
|
||||
MAXIT <- 50
|
||||
# number of arbitrary starting values for curvilinear optimization
|
||||
ATTEMPTS <- 10
|
||||
# set names of datasets
|
||||
dataset.names <- c("M1", "M2", "M3", "M4", "M5")
|
||||
# Set used CVE method
|
||||
methods <- c("simple") # c("legacy", "simple", "sgd", "linesearch")
|
||||
|
||||
## prepare "logging"
|
||||
# result error, time, ... data.frame's
|
||||
error <- matrix(nrow = nr.sim, ncol = 2 * length(dataset.names))
|
||||
time <- matrix(nrow = nr.sim, ncol = 2 * length(dataset.names))
|
||||
# convert to data.frames
|
||||
error <- as.data.frame(error)
|
||||
time <- as.data.frame(time)
|
||||
# set names
|
||||
names(error) <- kronecker(c("CVE.", "MAVE."), dataset.names, paste0)
|
||||
names(time) <- kronecker(c("CVE.", "MAVE."), dataset.names, paste0)
|
||||
# Setup error and time tracking variables
|
||||
error <- matrix(NA, SIM.NR, length(methods) * length(dataset.names))
|
||||
time <- matrix(NA, SIM.NR, ncol(error))
|
||||
colnames(error) <- kronecker(paste0(dataset.names, '-'), methods, paste0)
|
||||
colnames(time) <- colnames(error)
|
||||
|
||||
# get current time
|
||||
# only for telling user (to stdout)
|
||||
count <- 0
|
||||
start.time <- Sys.time()
|
||||
## main comparison loop (iterate `nr.sim` times for each dataset)
|
||||
for (i in seq_along(dataset.names)) {
|
||||
for (j in 1:nr.sim) {
|
||||
name <- dataset.names[i]
|
||||
# reporting progress
|
||||
cat("\rRunning Test (", name, j , "):",
|
||||
(i - 1) * nr.sim + j, "/", length(dataset.names) * nr.sim,
|
||||
" - Time since start:", format(Sys.time() - start.time), "\033[K")
|
||||
# sample new dataset
|
||||
# Start simulation loop.
|
||||
for (sim in 1:SIM.NR) {
|
||||
# Repeat for each dataset.
|
||||
for (name in dataset.names) {
|
||||
count <- count + 1
|
||||
tell.user(name, start.time, count, SIM.NR * length(dataset.names))
|
||||
|
||||
# Create a new dataset
|
||||
ds <- dataset(name)
|
||||
k <- ncol(ds$B) # real dim
|
||||
data <- data.frame(X = ds$X, Y = ds$Y)
|
||||
# call CVE
|
||||
cve.time <- system.time(
|
||||
cve.res <- cve(Y ~ .,
|
||||
data = data,
|
||||
method = "simple",
|
||||
k = k)
|
||||
)
|
||||
# call MAVE
|
||||
mave.time <- system.time(
|
||||
mave.res <- mave(Y ~ .,
|
||||
data = data,
|
||||
method = "meanMAVE")
|
||||
)
|
||||
# compute real and approximated sub-space projections
|
||||
P <- proj(ds$B) # real
|
||||
P.cve <- proj(cve.res$B)
|
||||
P.mave <- proj(mave.res$dir[[k]])
|
||||
# compute (and store) errors
|
||||
error[j, paste0("CVE.", name)] <- norm(P - P.cve, 'F') / sqrt(2 * k)
|
||||
error[j, paste0("MAVE.", name)] <- norm(P - P.mave, 'F') / sqrt(2 * k)
|
||||
# store run-times
|
||||
time[j, paste0("CVE.", name)] <- cve.time["elapsed"]
|
||||
time[j, paste0("MAVE.", name)] <- mave.time["elapsed"]
|
||||
# Prepare X, Y and combine to data matrix
|
||||
Y <- ds$Y
|
||||
X <- ds$X
|
||||
data <- cbind(Y, X)
|
||||
# get dimensions
|
||||
dim <- ncol(X)
|
||||
truedim <- ncol(ds$B)
|
||||
|
||||
for (method in methods) {
|
||||
dr.time <- system.time(
|
||||
dr <- cve.call(X, Y,
|
||||
method = method,
|
||||
k = truedim,
|
||||
attempts = ATTEMPTS
|
||||
)
|
||||
)
|
||||
dr <- dr[[truedim]]
|
||||
|
||||
key <- paste0(name, '-', method)
|
||||
error[sim, key] <- subspace.dist(dr$B, ds$B) / sqrt(2 * truedim)
|
||||
time[sim, key] <- dr.time["elapsed"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -91,16 +74,16 @@ print(colMeans(time))
|
|||
cat("\n## Error Means:\n")
|
||||
print(colMeans(error))
|
||||
|
||||
len <- length(dataset.names)
|
||||
boxplot(as.matrix(error),
|
||||
main = paste0("Error (nr.sim = ", nr.sim, ")"),
|
||||
ylab = expression(error == group("||", P[B] - P[hat(B)], "||")[F] / sqrt(2*k)),
|
||||
at <- seq(ncol(error)) + rep(seq(ncol(error) / length(methods)) - 1, each = length(methods))
|
||||
boxplot(error,
|
||||
main = paste0("Error (Nr of simulations ", SIM.NR, ")"),
|
||||
ylab = "Error",
|
||||
las = 2,
|
||||
at = c(1:len, 1:len + len + 1)
|
||||
at = at
|
||||
)
|
||||
boxplot(as.matrix(time),
|
||||
main = paste0("Time (nr.sim = ", nr.sim, ")"),
|
||||
ylab = "time [sec]",
|
||||
boxplot(time,
|
||||
main = paste0("Time (Nr of simulations ", SIM.NR, ")"),
|
||||
ylab = "Time [sec]",
|
||||
las = 2,
|
||||
at = c(1:len, 1:len + len + 1)
|
||||
at = at
|
||||
)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
Package: CVE
|
||||
Package: CVEpureR
|
||||
Type: Package
|
||||
Title: Conditional Variance Estimator for Sufficient Dimension Reduction
|
||||
Version: 0.1
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
S3method(plot,cve)
|
||||
S3method(summary,cve)
|
||||
export(col.pair.apply)
|
||||
export(cve)
|
||||
export(cve.call)
|
||||
export(cve.grid.search)
|
||||
|
@ -15,7 +14,6 @@ export(estimate.bandwidth)
|
|||
export(grad)
|
||||
export(null)
|
||||
export(rStiefl)
|
||||
export(row.pair.apply)
|
||||
import(stats)
|
||||
importFrom(graphics,lines)
|
||||
importFrom(graphics,plot)
|
||||
|
|
124
CVE_R/R/CVE.R
124
CVE_R/R/CVE.R
|
@ -1,3 +1,17 @@
|
|||
#' Conditional Variance Estimator (CVE)
|
||||
#'
|
||||
#' Conditional Variance Estimator for Sufficient Dimension
|
||||
#' Reduction
|
||||
#'
|
||||
#' TODO: And some details
|
||||
#'
|
||||
#'
|
||||
#' @references Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
|
||||
#'
|
||||
#' @docType package
|
||||
#' @author Loki
|
||||
"_PACKAGE"
|
||||
|
||||
#' Implementation of the CVE method.
|
||||
#'
|
||||
#' Conditional Variance Estimator (CVE) is a novel sufficient dimension
|
||||
|
@ -40,7 +54,7 @@
|
|||
#' @import stats
|
||||
#' @importFrom stats model.frame
|
||||
#' @export
|
||||
cve <- function(formula, data, method = "simple", ...) {
|
||||
cve <- function(formula, data, method = "simple", max.dim = 10, ...) {
|
||||
# check for type of `data` if supplied and set default
|
||||
if (missing(data)) {
|
||||
data <- environment(formula)
|
||||
|
@ -69,12 +83,8 @@ cve <- function(formula, data, method = "simple", ...) {
|
|||
#' @param ... Method specific parameters.
|
||||
#' @rdname cve
|
||||
#' @export
|
||||
cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5, k, ...) {
|
||||
|
||||
# TODO: replace default value of `k` by `max.dim` when fast enough
|
||||
if (missing(k)) {
|
||||
stop("TODO: parameter `k` (rank(B)) is required, replace by `max.dim`.")
|
||||
}
|
||||
cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5,
|
||||
min.dim = 1, max.dim = 10, k, ...) {
|
||||
|
||||
# parameter checking
|
||||
if (!is.matrix(X)) {
|
||||
|
@ -90,21 +100,42 @@ cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5, k, ...) {
|
|||
stop('X is one dimensional, no need for dimension reduction.')
|
||||
}
|
||||
|
||||
if (!missing(k)) {
|
||||
min.dim <- as.integer(k)
|
||||
max.dim <- as.integer(k)
|
||||
} else {
|
||||
min.dim <- as.integer(min.dim)
|
||||
max.dim <- as.integer(min(max.dim, ncol(X) - 1L))
|
||||
}
|
||||
if (min.dim > max.dim) {
|
||||
stop('`min.dim` bigger `max.dim`.')
|
||||
}
|
||||
if (max.dim >= ncol(X)) {
|
||||
stop('`max.dim` must be smaller than `ncol(X)`.')
|
||||
}
|
||||
|
||||
# Call specified method.
|
||||
method <- tolower(method)
|
||||
if (method == 'simple') {
|
||||
dr <- cve_simple(X, Y, k, nObs = nObs, ...)
|
||||
} else if (method == 'linesearch') {
|
||||
dr <- cve_linesearch(X, Y, k, nObs = nObs, ...)
|
||||
} else if (method == 'sgd') {
|
||||
dr <- cve_sgd(X, Y, k, nObs = nObs, ...)
|
||||
} else {
|
||||
stop('Got unknown method.')
|
||||
call <- match.call()
|
||||
dr <- list()
|
||||
for (k in min.dim:max.dim) {
|
||||
if (method == 'simple') {
|
||||
dr.k <- cve_simple(X, Y, k, nObs = nObs, ...)
|
||||
} else if (method == 'linesearch') {
|
||||
dr.k <- cve_linesearch(X, Y, k, nObs = nObs, ...)
|
||||
} else if (method == 'sgd') {
|
||||
dr.k <- cve_sgd(X, Y, k, nObs = nObs, ...)
|
||||
} else {
|
||||
stop('Got unknown method.')
|
||||
}
|
||||
dr.k$k <- k
|
||||
class(dr.k) <- "cve.k"
|
||||
dr[[k]] <- dr.k
|
||||
}
|
||||
|
||||
# augment result information
|
||||
dr$method <- method
|
||||
dr$call <- match.call()
|
||||
dr.k$method <- method
|
||||
dr.k$call <- call
|
||||
class(dr) <- "cve"
|
||||
return(dr)
|
||||
}
|
||||
|
@ -134,41 +165,42 @@ cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5, k, ...) {
|
|||
#' @export
|
||||
plot.cve <- function(x, ...) {
|
||||
|
||||
H <- x$history
|
||||
H_1 <- H[!is.na(H[, 1]), 1]
|
||||
|
||||
defaults <- list(
|
||||
main = "History",
|
||||
xlab = "Iterations i",
|
||||
ylab = expression(loss == L[n](V^{(i)})),
|
||||
xlim = c(1, nrow(H)),
|
||||
ylim = c(0, max(H)),
|
||||
type = "l"
|
||||
)
|
||||
# H <- x$history
|
||||
# H_1 <- H[!is.na(H[, 1]), 1]
|
||||
|
||||
call.plot <- match.call()
|
||||
keys <- names(defaults)
|
||||
keys <- keys[match(keys, names(call.plot)[-1], nomatch = 0) == 0]
|
||||
# defaults <- list(
|
||||
# main = "History",
|
||||
# xlab = "Iterations i",
|
||||
# ylab = expression(loss == L[n](V^{(i)})),
|
||||
# xlim = c(1, nrow(H)),
|
||||
# ylim = c(0, max(H)),
|
||||
# type = "l"
|
||||
# )
|
||||
|
||||
for (key in keys) {
|
||||
call.plot[[key]] <- defaults[[key]]
|
||||
}
|
||||
# call.plot <- match.call()
|
||||
# keys <- names(defaults)
|
||||
# keys <- keys[match(keys, names(call.plot)[-1], nomatch = 0) == 0]
|
||||
|
||||
call.plot[[1L]] <- quote(plot)
|
||||
call.plot$x <- quote(1:length(H_1))
|
||||
call.plot$y <- quote(H_1)
|
||||
# for (key in keys) {
|
||||
# call.plot[[key]] <- defaults[[key]]
|
||||
# }
|
||||
|
||||
eval(call.plot)
|
||||
# call.plot[[1L]] <- quote(plot)
|
||||
# call.plot$x <- quote(1:length(H_1))
|
||||
# call.plot$y <- quote(H_1)
|
||||
|
||||
if (ncol(H) > 1) {
|
||||
for (i in 2:ncol(H)) {
|
||||
H_i <- H[H[, i] > 0, i]
|
||||
lines(1:length(H_i), H_i)
|
||||
}
|
||||
}
|
||||
x.ends <- apply(H, 2, function(h) { length(h[!is.na(h)]) })
|
||||
y.ends <- apply(H, 2, function(h) { tail(h[!is.na(h)], n=1) })
|
||||
points(x.ends, y.ends)
|
||||
# eval(call.plot)
|
||||
|
||||
# if (ncol(H) > 1) {
|
||||
# for (i in 2:ncol(H)) {
|
||||
# H_i <- H[H[, i] > 0, i]
|
||||
# lines(1:length(H_i), H_i)
|
||||
# }
|
||||
# }
|
||||
# x.ends <- apply(H, 2, function(h) { length(h[!is.na(h)]) })
|
||||
# y.ends <- apply(H, 2, function(h) { tail(h[!is.na(h)], n=1) })
|
||||
# points(x.ends, y.ends)
|
||||
}
|
||||
|
||||
#' Prints a summary of a \code{cve} result.
|
||||
|
|
|
@ -13,18 +13,14 @@ cve_linesearch <- function(X, Y, k,
|
|||
slack = 0,
|
||||
epochs = 50L,
|
||||
attempts = 10L,
|
||||
max.linesearch.iter = 10L
|
||||
max.linesearch.iter = 10L,
|
||||
logger = NULL
|
||||
) {
|
||||
# Set `grad` functions environment to enable if to find this environments
|
||||
# local variabels, needed to enable the manipulation of this local variables
|
||||
# from within `grad`.
|
||||
environment(grad) <- environment()
|
||||
|
||||
# Setup histories.
|
||||
loss.history <- matrix(NA, epochs, attempts)
|
||||
error.history <- matrix(NA, epochs, attempts)
|
||||
tau.history <- matrix(NA, epochs, attempts)
|
||||
|
||||
# Get dimensions.
|
||||
n <- nrow(X)
|
||||
p <- ncol(X)
|
||||
|
@ -44,8 +40,8 @@ cve_linesearch <- function(X, Y, k,
|
|||
# Compute lookup indexes for symmetrie, lower/upper
|
||||
# triangular parts and vectorization.
|
||||
pair.index <- elem.pairs(seq(n))
|
||||
i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
|
||||
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
|
||||
# Matrix of vectorized indices. (vec(index) -> seq)
|
||||
index <- matrix(seq(n * n), n, n)
|
||||
lower <- index[lower.tri(index)]
|
||||
|
@ -73,6 +69,13 @@ cve_linesearch <- function(X, Y, k,
|
|||
# Set last loss (aka, loss after applying the step).
|
||||
loss.last <- loss
|
||||
|
||||
# Call logger with initial values before starting optimization.
|
||||
if (is.function(logger)) {
|
||||
epoch <- 0 # Set epoch count to 0 (only relevant for logging).
|
||||
error <- NA
|
||||
logger(environment())
|
||||
}
|
||||
|
||||
## Start optimization loop.
|
||||
for (epoch in 1:epochs) {
|
||||
|
||||
|
@ -124,16 +127,16 @@ cve_linesearch <- function(X, Y, k,
|
|||
# Compute error.
|
||||
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
|
||||
|
||||
# Write history.
|
||||
loss.history[epoch, attempt] <- loss
|
||||
error.history[epoch, attempt] <- error
|
||||
tau.history[epoch, attempt] <- tau
|
||||
|
||||
# Check break condition (epoch check to skip ignored gradient calc).
|
||||
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
|
||||
if (error < tol | epoch >= epochs) {
|
||||
# take last step and stop optimization.
|
||||
V <- V.tau
|
||||
# Final call to the logger before stopping optimization
|
||||
if (is.function(logger)) {
|
||||
G <- G.tau
|
||||
logger(environment())
|
||||
}
|
||||
break()
|
||||
}
|
||||
|
||||
|
@ -141,6 +144,12 @@ cve_linesearch <- function(X, Y, k,
|
|||
V <- V.tau
|
||||
loss.last <- loss
|
||||
G <- G.tau
|
||||
|
||||
# Log after taking current step.
|
||||
if (is.function(logger)) {
|
||||
logger(environment())
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
# Check if current attempt improved previous ones
|
||||
|
@ -152,9 +161,6 @@ cve_linesearch <- function(X, Y, k,
|
|||
}
|
||||
|
||||
return(list(
|
||||
loss.history = loss.history,
|
||||
error.history = error.history,
|
||||
tau.history = tau.history,
|
||||
loss = loss.best,
|
||||
V = V.best,
|
||||
B = null(V.best),
|
||||
|
|
|
@ -10,17 +10,14 @@ cve_sgd <- function(X, Y, k,
|
|||
tol = 1e-3,
|
||||
epochs = 50L,
|
||||
batch.size = 16L,
|
||||
attempts = 10L
|
||||
attempts = 10L,
|
||||
logger = NULL
|
||||
) {
|
||||
# Set `grad` functions environment to enable if to find this environments
|
||||
# local variabels, needed to enable the manipulation of this local variables
|
||||
# from within `grad`.
|
||||
environment(grad) <- environment()
|
||||
|
||||
# Setup histories.
|
||||
loss.history <- matrix(NA, epochs, attempts)
|
||||
error.history <- matrix(NA, epochs, attempts)
|
||||
|
||||
# Get dimensions.
|
||||
n <- nrow(X) # Number of samples.
|
||||
p <- ncol(X) # Data dimensions
|
||||
|
@ -32,7 +29,7 @@ cve_sgd <- function(X, Y, k,
|
|||
tol <- sqrt(2 * q) * tol
|
||||
|
||||
# Estaimate bandwidth if not given.
|
||||
if (missing(h) | !is.numeric(h)) {
|
||||
if (missing(h) || !is.numeric(h)) {
|
||||
h <- estimate.bandwidth(X, k, nObs)
|
||||
}
|
||||
|
||||
|
@ -40,12 +37,11 @@ cve_sgd <- function(X, Y, k,
|
|||
# Compute lookup indexes for symmetrie, lower/upper
|
||||
# triangular parts and vectorization.
|
||||
pair.index <- elem.pairs(seq(n))
|
||||
i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
|
||||
# Matrix of vectorized indices. (vec(index) -> seq)
|
||||
index <- matrix(seq(n * n), n, n)
|
||||
lower <- index[lower.tri(index)]
|
||||
upper <- t(index)[lower]
|
||||
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
|
||||
# Index of vectorized matrix, for lower and upper triangular part.
|
||||
lower <- ((i - 1) * n) + j
|
||||
upper <- ((j - 1) * n) + i
|
||||
|
||||
# Create all pairewise differences of rows of `X`.
|
||||
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||
|
@ -69,17 +65,23 @@ cve_sgd <- function(X, Y, k,
|
|||
# Keep track of last `V` for computing error after an epoch.
|
||||
V.last <- V
|
||||
|
||||
if (is.function(logger)) {
|
||||
loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
|
||||
error <- NA
|
||||
epoch <- 0
|
||||
logger(environment())
|
||||
}
|
||||
|
||||
# Repeat `epochs` times
|
||||
for (epoch in 1:epochs) {
|
||||
# Shuffle batches
|
||||
batch.shuffle <- sample(indices)
|
||||
|
||||
# Make a step for each batch.
|
||||
for (start in seq(1, n, batch.size)) {
|
||||
for (batch.start in seq(1, n, batch.size)) {
|
||||
# Select batch data indices.
|
||||
batch <- batch.shuffle[start:(start + batch.size - 1)]
|
||||
# Remove `NA`'s (when `n` isn't a multiple of `batch.size`).
|
||||
batch <- batch[!is.na(batch)]
|
||||
batch.end <- min(batch.start + batch.size - 1, length(batch.shuffle))
|
||||
batch <- batch.shuffle[batch.start:batch.end]
|
||||
|
||||
# Compute batch gradient.
|
||||
loss <- NULL
|
||||
|
@ -93,21 +95,24 @@ cve_sgd <- function(X, Y, k,
|
|||
# Parallet transport (on Stiefl manifold) into direction of `G`.
|
||||
V <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
|
||||
}
|
||||
# Compute actuall loss after finishing optimization.
|
||||
loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
|
||||
# And the error for the history.
|
||||
error <- norm(V.last %*% t(V.last) - V %*% t(V), type = "F")
|
||||
V.last <- V
|
||||
|
||||
# Finaly write history.
|
||||
loss.history[epoch, attempt] <- loss
|
||||
error.history[epoch, attempt] <- error
|
||||
if (is.function(logger)) {
|
||||
# Compute loss at end of epoch for logging.
|
||||
loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
|
||||
logger(environment())
|
||||
}
|
||||
|
||||
# Check break condition.
|
||||
if (error < tol) {
|
||||
break()
|
||||
}
|
||||
}
|
||||
# Compute actual loss after finishing for comparing multiple attempts.
|
||||
loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
|
||||
|
||||
# After each attempt, check if last attempt reached a better result.
|
||||
if (loss < loss.best) {
|
||||
loss.best <- loss
|
||||
|
@ -116,8 +121,6 @@ cve_sgd <- function(X, Y, k,
|
|||
}
|
||||
|
||||
return(list(
|
||||
loss.history = loss.history,
|
||||
error.history = error.history,
|
||||
loss = loss.best,
|
||||
V = V.best,
|
||||
B = null(V.best),
|
||||
|
|
|
@ -10,21 +10,18 @@ cve_simple <- function(X, Y, k,
|
|||
tol = 1e-3,
|
||||
slack = 0,
|
||||
epochs = 50L,
|
||||
attempts = 10L
|
||||
attempts = 10L,
|
||||
logger = NULL
|
||||
) {
|
||||
# Set `grad` functions environment to enable if to find this environments
|
||||
# local variabels, needed to enable the manipulation of this local variables
|
||||
# from within `grad`.
|
||||
environment(grad) <- environment()
|
||||
|
||||
# Setup histories.
|
||||
loss.history <- matrix(NA, epochs, attempts)
|
||||
error.history <- matrix(NA, epochs, attempts)
|
||||
|
||||
# Get dimensions.
|
||||
n <- nrow(X)
|
||||
p <- ncol(X)
|
||||
q <- p - k
|
||||
n <- nrow(X) # Number of samples.
|
||||
p <- ncol(X) # Data dimensions
|
||||
q <- p - k # Complement dimension of the SDR space.
|
||||
|
||||
# Save initial learning rate `tau`.
|
||||
tau.init <- tau
|
||||
|
@ -32,7 +29,7 @@ cve_simple <- function(X, Y, k,
|
|||
tol <- sqrt(2 * q) * tol
|
||||
|
||||
# Estaimate bandwidth if not given.
|
||||
if (missing(h) | !is.numeric(h)) {
|
||||
if (missing(h) || !is.numeric(h)) {
|
||||
h <- estimate.bandwidth(X, k, nObs)
|
||||
}
|
||||
|
||||
|
@ -40,12 +37,11 @@ cve_simple <- function(X, Y, k,
|
|||
# Compute lookup indexes for symmetrie, lower/upper
|
||||
# triangular parts and vectorization.
|
||||
pair.index <- elem.pairs(seq(n))
|
||||
i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
|
||||
# Matrix of vectorized indices. (vec(index) -> seq)
|
||||
index <- matrix(seq(n * n), n, n)
|
||||
lower <- index[lower.tri(index)]
|
||||
upper <- t(index)[lower]
|
||||
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
|
||||
# Index of vectorized matrix, for lower and upper triangular part.
|
||||
lower <- ((i - 1) * n) + j
|
||||
upper <- ((j - 1) * n) + i
|
||||
|
||||
# Create all pairewise differences of rows of `X`.
|
||||
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||
|
@ -58,8 +54,7 @@ cve_simple <- function(X, Y, k,
|
|||
|
||||
# Start loop for multiple attempts.
|
||||
for (attempt in 1:attempts) {
|
||||
|
||||
# reset step width `tau`
|
||||
# Reset learning rate `tau`.
|
||||
tau <- tau.init
|
||||
|
||||
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
|
||||
|
@ -75,6 +70,13 @@ cve_simple <- function(X, Y, k,
|
|||
# Cayley transform matrix `A`
|
||||
A <- (G %*% t(V)) - (V %*% t(G))
|
||||
|
||||
# Call logger with initial values before starting optimization.
|
||||
if (is.function(logger)) {
|
||||
epoch <- 0 # Set epoch count to 0 (only relevant for logging).
|
||||
error <- NA
|
||||
logger(environment())
|
||||
}
|
||||
|
||||
## Start optimization loop.
|
||||
for (epoch in 1:epochs) {
|
||||
# Apply learning rate `tau`.
|
||||
|
@ -85,7 +87,7 @@ cve_simple <- function(X, Y, k,
|
|||
# Loss at position after a step.
|
||||
loss <- grad(X, Y, V.tau, h, loss.only = TRUE, persistent = TRUE)
|
||||
|
||||
# Check if step is appropriate
|
||||
# Check if step is appropriate, iff not reduce learning rate.
|
||||
if ((loss - loss.last) > slack * loss.last) {
|
||||
tau <- tau / 2
|
||||
next() # Keep position and try with smaller `tau`.
|
||||
|
@ -94,15 +96,15 @@ cve_simple <- function(X, Y, k,
|
|||
# Compute error.
|
||||
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
|
||||
|
||||
# Write history.
|
||||
loss.history[epoch, attempt] <- loss
|
||||
error.history[epoch, attempt] <- error
|
||||
|
||||
# Check break condition (epoch check to skip ignored gradient calc).
|
||||
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
|
||||
if (error < tol | epoch >= epochs) {
|
||||
if (error < tol || epoch >= epochs) {
|
||||
# take last step and stop optimization.
|
||||
V <- V.tau
|
||||
# Call logger last time befor stoping.
|
||||
if (is.function(logger)) {
|
||||
logger(environment())
|
||||
}
|
||||
break()
|
||||
}
|
||||
|
||||
|
@ -110,6 +112,11 @@ cve_simple <- function(X, Y, k,
|
|||
V <- V.tau
|
||||
loss.last <- loss
|
||||
|
||||
# Call logger after taking a step.
|
||||
if (is.function(logger)) {
|
||||
logger(environment())
|
||||
}
|
||||
|
||||
# Compute gradient at new position.
|
||||
G <- grad(X, Y, V, h, persistent = TRUE)
|
||||
|
||||
|
@ -126,8 +133,6 @@ cve_simple <- function(X, Y, k,
|
|||
}
|
||||
|
||||
return(list(
|
||||
loss.history = loss.history,
|
||||
error.history = error.history,
|
||||
loss = loss.best,
|
||||
V = V.best,
|
||||
B = null(V.best),
|
||||
|
|
|
@ -24,12 +24,11 @@ grad <- function(X, Y, V, h,
|
|||
# Compute lookup indexes for symmetrie, lower/upper
|
||||
# triangular parts and vectorization.
|
||||
pair.index <- elem.pairs(seq(n))
|
||||
i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
|
||||
# Matrix of vectorized indices. (vec(index) -> seq)
|
||||
index <- matrix(seq(n * n), n, n)
|
||||
lower <- index[lower.tri(index)]
|
||||
upper <- t.default(index)[lower]
|
||||
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
|
||||
# Index of vectorized matrix, for lower and upper triangular part.
|
||||
lower <- ((i - 1) * n) + j
|
||||
upper <- ((j - 1) * n) + i
|
||||
|
||||
# Create all pairewise differences of rows of `X`.
|
||||
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||
|
@ -39,7 +38,7 @@ grad <- function(X, Y, V, h,
|
|||
Q <- diag(1, p) - tcrossprod(V, V)
|
||||
|
||||
# Vectorized distance matrix `D`.
|
||||
vecD <- rowSums((X_diff %*% Q)^2)
|
||||
vecD <- colSums(tcrossprod(Q, X_diff)^2)
|
||||
|
||||
# Weight matrix `W` (dnorm ... gaussean density function)
|
||||
W <- matrix(1, n, n) # `exp(0) == 1`
|
||||
|
|
|
@ -22,42 +22,19 @@ null <- function(V) {
|
|||
return(qr.Q(tmp, complete=TRUE)[, set, drop=FALSE])
|
||||
}
|
||||
|
||||
#' Creates a (numeric) matrix where each row contains
|
||||
#' Creates a (numeric) matrix where each column contains
|
||||
#' an element to element matching.
|
||||
#' @param elements numeric vector of elements to match
|
||||
#' @return matrix of size `(n * (n - 1) / 2, 2)` for a argument of lenght `n`.
|
||||
#' @return matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`.
|
||||
#' @keywords internal
|
||||
#' @examples
|
||||
#' elem.pairs(seq.int(2, 5))
|
||||
#' @export
|
||||
elem.pairs <- function(elements) {
|
||||
# Number of elements to match.
|
||||
n <- length(elements)
|
||||
# Create all combinations.
|
||||
pairs <- cbind(rep(elements, each=n), rep(elements, n))
|
||||
pairs <- rbind(rep(elements, each=n), rep(elements, n))
|
||||
# Select unique combinations without self interaction.
|
||||
return(pairs[pairs[, 1] < pairs[, 2], ])
|
||||
}
|
||||
|
||||
#' Apply function to pairs of matrix rows or columns.
|
||||
#'
|
||||
#' \code{row.pair.apply} applies \code{FUN} to each unique row pair without self
|
||||
#' interaction while \code{col.pair.apply} does the same for columns.
|
||||
#' @param X Matrix.
|
||||
#' @param FUN Function to apply to each pair.
|
||||
#' @examples
|
||||
#' X <- matrix(seq(12), 4, 3)
|
||||
#' # matrix containing all row to row differences.
|
||||
#' row.pair.apply(X, `-`)
|
||||
#' @aliases row.pair.apply, col.pair.apply
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
row.pair.apply <- function(X, FUN) {
|
||||
pairs <- elem.pairs(seq(nrow(X)))
|
||||
return(FUN(X[pairs[, 1], ], X[pairs[, 2], ]))
|
||||
}
|
||||
#' @rdname row.pair.apply
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
col.pair.apply <- function(X, FUN) {
|
||||
pairs <- elem.pairs(seq(ncol(X)))
|
||||
return(FUN(X[, pairs[, 1]], X[, pairs[, 2]]))
|
||||
return(pairs[, pairs[1, ] < pairs[2, ]])
|
||||
}
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
runtime_test Runtime comparison of CVE against MAVE for M1 - M5 datasets.
|
||||
logging Example of a logger function for cve algorithm analysis.
|
|
@ -0,0 +1,43 @@
|
|||
library(CVEpureR)
|
||||
|
||||
# Setup histories.
|
||||
(epochs <- 50)
|
||||
(attempts <- 10)
|
||||
loss.history <- matrix(NA, epochs + 1, attempts)
|
||||
error.history <- matrix(NA, epochs + 1, attempts)
|
||||
tau.history <- matrix(NA, epochs + 1, attempts)
|
||||
true.error.history <- matrix(NA, epochs + 1, attempts)
|
||||
|
||||
# Create a dataset
|
||||
ds <- dataset("M1")
|
||||
X <- ds$X
|
||||
Y <- ds$Y
|
||||
B <- ds$B # the true `B`
|
||||
(k <- ncol(ds$B))
|
||||
|
||||
# True projection matrix.
|
||||
P <- B %*% solve(t(B) %*% B) %*% t(B)
|
||||
# Define the logger for the `cve()` method.
|
||||
logger <- function(env) {
|
||||
# Note the `<<-` assignement!
|
||||
loss.history[env$epoch + 1, env$attempt] <<- env$loss
|
||||
error.history[env$epoch + 1, env$attempt] <<- env$error
|
||||
tau.history[env$epoch + 1, env$attempt] <<- env$tau
|
||||
# Compute true error by comparing to the true `B`
|
||||
B.est <- null(env$V) # Function provided by CVE
|
||||
P.est <- B.est %*% solve(t(B.est) %*% B.est) %*% t(B.est)
|
||||
true.error <- norm(P - P.est, 'F') / sqrt(2 * k)
|
||||
true.error.history[env$epoch + 1, env$attempt] <<- true.error
|
||||
}
|
||||
# Performe SDR for ONE `k`.
|
||||
dr <- cve(Y ~ X, k = k, logger = logger, epochs = epochs, attempts = attempts)
|
||||
# Plot history's
|
||||
par(mfrow = c(2, 2))
|
||||
matplot(loss.history, type = 'l', log = 'y', xlab = 'iter',
|
||||
main = 'loss', ylab = expression(L(V[iter])))
|
||||
matplot(error.history, type = 'l', log = 'y', xlab = 'iter',
|
||||
main = 'error', ylab = 'error')
|
||||
matplot(tau.history, type = 'l', log = 'y', xlab = 'iter',
|
||||
main = 'tau', ylab = 'tau')
|
||||
matplot(true.error.history, type = 'l', log = 'y', xlab = 'iter',
|
||||
main = 'true error', ylab = 'true error')
|
|
@ -0,0 +1,89 @@
|
|||
# Usage:
|
||||
# ~$ Rscript runtime_test.R
|
||||
|
||||
library(CVEpureR) # load CVE
|
||||
|
||||
#' Writes progress to console.
|
||||
tell.user <- function(name, start.time, i, length) {
|
||||
cat("\rRunning Test (", name, "):",
|
||||
i, "/", length,
|
||||
" - elapsed:", format(Sys.time() - start.time), "\033[K")
|
||||
}
|
||||
subspace.dist <- function(B1, B2){
|
||||
P1 <- B1 %*% solve(t(B1) %*% B1) %*% t(B1)
|
||||
P2 <- B2 %*% solve(t(B2) %*% B2) %*% t(B2)
|
||||
return(norm(P1 - P2, type = 'F'))
|
||||
}
|
||||
|
||||
# Number of simulations
|
||||
SIM.NR <- 50
|
||||
# maximal number of iterations in curvilinear search algorithm
|
||||
MAXIT <- 50
|
||||
# number of arbitrary starting values for curvilinear optimization
|
||||
ATTEMPTS <- 10
|
||||
# set names of datasets
|
||||
dataset.names <- c("M1", "M2", "M3", "M4", "M5")
|
||||
# Set used CVE method
|
||||
methods <- c("simple") # c("legacy", "simple", "sgd", "linesearch")
|
||||
|
||||
# Setup error and time tracking variables
|
||||
error <- matrix(NA, SIM.NR, length(methods) * length(dataset.names))
|
||||
time <- matrix(NA, SIM.NR, ncol(error))
|
||||
colnames(error) <- kronecker(paste0(dataset.names, '-'), methods, paste0)
|
||||
colnames(time) <- colnames(error)
|
||||
|
||||
# only for telling user (to stdout)
|
||||
count <- 0
|
||||
start.time <- Sys.time()
|
||||
# Start simulation loop.
|
||||
for (sim in 1:SIM.NR) {
|
||||
# Repeat for each dataset.
|
||||
for (name in dataset.names) {
|
||||
count <- count + 1
|
||||
tell.user(name, start.time, count, SIM.NR * length(dataset.names))
|
||||
|
||||
# Create a new dataset
|
||||
ds <- dataset(name)
|
||||
# Prepare X, Y and combine to data matrix
|
||||
Y <- ds$Y
|
||||
X <- ds$X
|
||||
data <- cbind(Y, X)
|
||||
# get dimensions
|
||||
dim <- ncol(X)
|
||||
truedim <- ncol(ds$B)
|
||||
|
||||
for (method in methods) {
|
||||
dr.time <- system.time(
|
||||
dr <- cve.call(X, Y,
|
||||
method = method,
|
||||
k = truedim,
|
||||
attempts = ATTEMPTS
|
||||
)
|
||||
)
|
||||
dr <- dr[[truedim]]
|
||||
|
||||
key <- paste0(name, '-', method)
|
||||
error[sim, key] <- subspace.dist(dr$B, ds$B) / sqrt(2 * truedim)
|
||||
time[sim, key] <- dr.time["elapsed"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cat("\n\n## Time [sec] Means:\n")
|
||||
print(colMeans(time))
|
||||
cat("\n## Error Means:\n")
|
||||
print(colMeans(error))
|
||||
|
||||
at <- seq(ncol(error)) + rep(seq(ncol(error) / length(methods)) - 1, each = length(methods))
|
||||
boxplot(error,
|
||||
main = paste0("Error (Nr of simulations ", SIM.NR, ")"),
|
||||
ylab = "Error",
|
||||
las = 2,
|
||||
at = at
|
||||
)
|
||||
boxplot(time,
|
||||
main = paste0("Time (Nr of simulations ", SIM.NR, ")"),
|
||||
ylab = "Time [sec]",
|
||||
las = 2,
|
||||
at = at
|
||||
)
|
|
@ -0,0 +1,20 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/CVE.R
|
||||
\docType{package}
|
||||
\name{CVEpureR-package}
|
||||
\alias{CVEpureR}
|
||||
\alias{CVEpureR-package}
|
||||
\title{Conditional Variance Estimator (CVE)}
|
||||
\description{
|
||||
Conditional Variance Estimator for Sufficient Dimension
|
||||
Reduction
|
||||
}
|
||||
\details{
|
||||
TODO: And some details
|
||||
}
|
||||
\references{
|
||||
Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
|
||||
}
|
||||
\author{
|
||||
Loki
|
||||
}
|
|
@ -5,9 +5,10 @@
|
|||
\alias{cve.call}
|
||||
\title{Implementation of the CVE method.}
|
||||
\usage{
|
||||
cve(formula, data, method = "simple", ...)
|
||||
cve(formula, data, method = "simple", max.dim = 10, ...)
|
||||
|
||||
cve.call(X, Y, method = "simple", nObs = nrow(X)^0.5, k, ...)
|
||||
cve.call(X, Y, method = "simple", nObs = nrow(X)^0.5, min.dim = 1,
|
||||
max.dim = 10, k, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{formula}{Formel for the regression model defining `X`, `Y`.
|
||||
|
|
|
@ -7,7 +7,7 @@ conditions.}
|
|||
\usage{
|
||||
cve_linesearch(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
|
||||
tol = 0.001, rho1 = 0.1, rho2 = 0.9, slack = 0, epochs = 50L,
|
||||
attempts = 10L, max.linesearch.iter = 10L)
|
||||
attempts = 10L, max.linesearch.iter = 10L, logger = NULL)
|
||||
}
|
||||
\description{
|
||||
Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
|
||||
|
|
|
@ -6,7 +6,8 @@
|
|||
a classic GD method unsing no further tricks.}
|
||||
\usage{
|
||||
cve_sgd(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 0.01,
|
||||
tol = 0.001, epochs = 50L, batch.size = 16L, attempts = 10L)
|
||||
tol = 0.001, epochs = 50L, batch.size = 16L, attempts = 10L,
|
||||
logger = NULL)
|
||||
}
|
||||
\description{
|
||||
Simple implementation of the CVE method. 'Simple' means that this method is
|
||||
|
|
|
@ -6,7 +6,8 @@
|
|||
a classic GD method unsing no further tricks.}
|
||||
\usage{
|
||||
cve_simple(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
|
||||
tol = 0.001, slack = 0, epochs = 50L, attempts = 10L)
|
||||
tol = 0.001, slack = 0, epochs = 50L, attempts = 10L,
|
||||
logger = NULL)
|
||||
}
|
||||
\description{
|
||||
Simple implementation of the CVE method. 'Simple' means that this method is
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
% Please edit documentation in R/util.R
|
||||
\name{elem.pairs}
|
||||
\alias{elem.pairs}
|
||||
\title{Creates a (numeric) matrix where each row contains
|
||||
\title{Creates a (numeric) matrix where each column contains
|
||||
an element to element matching.}
|
||||
\usage{
|
||||
elem.pairs(elements)
|
||||
|
@ -11,10 +11,13 @@ elem.pairs(elements)
|
|||
\item{elements}{numeric vector of elements to match}
|
||||
}
|
||||
\value{
|
||||
matrix of size `(n * (n - 1) / 2, 2)` for a argument of lenght `n`.
|
||||
matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`.
|
||||
}
|
||||
\description{
|
||||
Creates a (numeric) matrix where each row contains
|
||||
Creates a (numeric) matrix where each column contains
|
||||
an element to element matching.
|
||||
}
|
||||
\examples{
|
||||
elem.pairs(seq.int(2, 5))
|
||||
}
|
||||
\keyword{internal}
|
||||
|
|
|
@ -1,27 +0,0 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/util.R
|
||||
\name{row.pair.apply}
|
||||
\alias{row.pair.apply}
|
||||
\alias{row.pair.apply,}
|
||||
\alias{col.pair.apply}
|
||||
\title{Apply function to pairs of matrix rows or columns.}
|
||||
\usage{
|
||||
row.pair.apply(X, FUN)
|
||||
|
||||
col.pair.apply(X, FUN)
|
||||
}
|
||||
\arguments{
|
||||
\item{X}{Matrix.}
|
||||
|
||||
\item{FUN}{Function to apply to each pair.}
|
||||
}
|
||||
\description{
|
||||
\code{row.pair.apply} applies \code{FUN} to each unique row pair without self
|
||||
interaction while \code{col.pair.apply} does the same for columns.
|
||||
}
|
||||
\examples{
|
||||
X <- matrix(seq(12), 4, 3)
|
||||
# matrix containing all row to row differences.
|
||||
row.pair.apply(X, `-`)
|
||||
}
|
||||
\keyword{internal}
|
109
notes.md
109
notes.md
|
@ -1,3 +1,12 @@
|
|||
# General Notes for Souce Code analysis
|
||||
## Search in multiple files.
|
||||
Using the Linux `grep` program with the parameters `-rnw` and specifying a include files filter like the following example.
|
||||
```bash
|
||||
grep --include=*\.{c,h,R} -rnw '.' -e "sweep"
|
||||
```
|
||||
searches in all `C` source and header fils as well as `R` source files for the term _sweep_.
|
||||
|
||||
# Development
|
||||
## Build and install.
|
||||
To build the package the `devtools` package is used. This also provides `roxygen2` which is used for documentation and authomatic creaton of the `NAMESPACE` file.
|
||||
```R
|
||||
|
@ -29,6 +38,55 @@ install.packages(path, repos = NULL, type = "source")
|
|||
```
|
||||
**Note: I only recommend this approach during development.**
|
||||
|
||||
# Analysing
|
||||
## Logging (a `cve` run).
|
||||
To log `loss`, `error` (estimated) the true error (error of current estimated `B` against the true `B`) or even the stepsize one can use the `logger` parameter. A `logger` is a function that gets the current `environment` of the CVE optimization methods (__do not alter this environment, only read from it__). This can be used to create logs like in the following example.
|
||||
```R
|
||||
library(CVE)
|
||||
|
||||
# Setup histories.
|
||||
(epochs <- 50)
|
||||
(attempts <- 10)
|
||||
loss.history <- matrix(NA, epochs + 1, attempts)
|
||||
error.history <- matrix(NA, epochs + 1, attempts)
|
||||
tau.history <- matrix(NA, epochs + 1, attempts)
|
||||
true.error.history <- matrix(NA, epochs + 1, attempts)
|
||||
|
||||
# Create a dataset
|
||||
ds <- dataset("M1")
|
||||
X <- ds$X
|
||||
Y <- ds$Y
|
||||
B <- ds$B # the true `B`
|
||||
(k <- ncol(ds$B))
|
||||
|
||||
# True projection matrix.
|
||||
P <- B %*% solve(t(B) %*% B) %*% t(B)
|
||||
# Define the logger for the `cve()` method.
|
||||
logger <- function(env) {
|
||||
# Note the `<<-` assignement!
|
||||
loss.history[env$epoch + 1, env$attempt] <<- env$loss
|
||||
error.history[env$epoch + 1, env$attempt] <<- env$error
|
||||
tau.history[env$epoch + 1, env$attempt] <<- env$tau
|
||||
# Compute true error by comparing to the true `B`
|
||||
B.est <- null(env$V) # Function provided by CVE
|
||||
P.est <- B.est %*% solve(t(B.est) %*% B.est) %*% t(B.est)
|
||||
true.error <- norm(P - P.est, 'F') / sqrt(2 * k)
|
||||
true.error.history[env$epoch + 1, env$attempt] <<- true.error
|
||||
}
|
||||
# Performa SDR
|
||||
dr <- cve(Y ~ X, k = k, logger = logger, epochs = epochs, attempts = attempts)
|
||||
# Plot history's
|
||||
par(mfrow = c(2, 2))
|
||||
matplot(loss.history, type = 'l', log = 'y', xlab = 'iter',
|
||||
main = 'loss', ylab = expression(L(V[iter])))
|
||||
matplot(error.history, type = 'l', log = 'y', xlab = 'iter',
|
||||
main = 'error', ylab = 'error')
|
||||
matplot(tau.history, type = 'l', log = 'y', xlab = 'iter',
|
||||
main = 'tau', ylab = 'tau')
|
||||
matplot(true.error.history, type = 'l', log = 'y', xlab = 'iter',
|
||||
main = 'true error', ylab = 'true error')
|
||||
```
|
||||
|
||||
## Reading log files.
|
||||
The runtime tests (upcomming further tests) are creating log files saved in `tmp/`. These log files are `CSV` files (actualy `TSV`) with a header storing the test results. Depending on the test the files may contain differnt data. As an example we use the runtime test logs which store in each line the `dataset`, the used `method` as well as the `error` (actual error of estimated `B` against real `B`) and the `time`. For reading and analysing the data see the following example.
|
||||
```R
|
||||
|
@ -66,7 +124,9 @@ jedi.seeks <- function() {
|
|||
}
|
||||
|
||||
trooper.seeks()
|
||||
# [1] "These aren't the droids you're looking for."
|
||||
jedi.seeks()
|
||||
# [1] "R2-D2", "C-3PO"
|
||||
```
|
||||
|
||||
The next example ilustrates how to write (without local copies) to variables outside the functions local environment.
|
||||
|
@ -127,27 +187,25 @@ library(microbenchmark)
|
|||
|
||||
A <- matrix(runif(120), 12, 10)
|
||||
|
||||
# Matrix trace.
|
||||
tr <- function(M) sum(diag(M))
|
||||
|
||||
# Check correctnes and benckmark performance.
|
||||
stopifnot(
|
||||
all.equal(
|
||||
tr(t(A) %*% A),
|
||||
sum(diag(t(A) %*% A)),
|
||||
sum(A * A)
|
||||
sum(diag(t(A) %*% A)), sum(diag(crossprod(A, A)))
|
||||
),
|
||||
all.equal(
|
||||
sum(diag(t(A) %*% A)), sum(A * A)
|
||||
)
|
||||
)
|
||||
microbenchmark(
|
||||
tr(t(A) %*% A),
|
||||
sum(diag(t(A) %*% A)),
|
||||
sum(A * A)
|
||||
MM = sum(diag(t(A) %*% A)),
|
||||
cross = sum(diag(crossprod(A, A))),
|
||||
elem = sum(A * A)
|
||||
)
|
||||
# Unit: nanoseconds
|
||||
# expr min lq mean median uq max neval
|
||||
# tr(t(A) %*% A) 4335 4713 5076.36 4949.5 5402.5 7928 100
|
||||
# sum(diag(t(A) %*% A)) 4106 4429 5233.89 4733.5 5057.5 49308 100
|
||||
# sum(A * A) 540 681 777.07 740.0 818.5 3572 100
|
||||
# expr min lq mean median uq max neval
|
||||
# MM 4232 4570.0 5138.81 4737 4956.0 40308 100
|
||||
# cross 2523 2774.5 2974.93 2946 3114.5 5078 100
|
||||
# elem 582 762.5 973.02 834 964.0 12945 100
|
||||
```
|
||||
|
||||
```R
|
||||
|
@ -243,6 +301,31 @@ microbenchmark(
|
|||
# outer 1141.479 1216.929 1404.702 1317.7315 1582.800 2531.766 100
|
||||
```
|
||||
|
||||
### Fast dist matrix computation (aka. row sum of squares).
|
||||
```R
|
||||
library(microbenchmark)
|
||||
library(CVE)
|
||||
|
||||
(n <- 200)
|
||||
(N <- n * (n - 1) / 2)
|
||||
(p <- 12)
|
||||
M <- matrix(runif(N * p), N, p)
|
||||
|
||||
stopifnot(
|
||||
all.equal(rowSums(M^2), rowSums.c(M^2)),
|
||||
all.equal(rowSums(M^2), rowSquareSums.c(M))
|
||||
)
|
||||
microbenchmark(
|
||||
sums = rowSums(M^2),
|
||||
sums.c = rowSums.c(M^2),
|
||||
sqSums.c = rowSquareSums.c(M)
|
||||
)
|
||||
# Unit: microseconds
|
||||
# expr min lq mean median uq max neval
|
||||
# sums 666.311 1051.036 1612.3100 1139.0065 1547.657 13940.97 100
|
||||
# sums.c 342.647 672.453 1009.9109 740.6255 1224.715 13765.90 100
|
||||
# sqSums.c 115.325 142.128 175.6242 153.4645 169.678 759.87 100
|
||||
```
|
||||
|
||||
## Using `Rprof()` for performance.
|
||||
The standart method for profiling where an algorithm is spending its time is with `Rprof()`.
|
||||
|
|
|
@ -14,7 +14,7 @@ subspace.dist <- function(B1, B2){
|
|||
}
|
||||
|
||||
# Number of simulations
|
||||
SIM.NR <- 50
|
||||
SIM.NR <- 20
|
||||
# maximal number of iterations in curvilinear search algorithm
|
||||
MAXIT <- 50
|
||||
# number of arbitrary starting values for curvilinear optimization
|
||||
|
@ -22,7 +22,7 @@ ATTEMPTS <- 10
|
|||
# set names of datasets
|
||||
dataset.names <- c("M1", "M2", "M3", "M4", "M5")
|
||||
# Set used CVE method
|
||||
methods <- c("simple") #, "sgd") # "legacy"
|
||||
methods <- c("simple") # c("legacy", "simple", "sgd", "linesearch")
|
||||
|
||||
library(CVE) # load CVE
|
||||
if ("legacy" %in% methods) {
|
||||
|
@ -83,6 +83,7 @@ for (sim in 1:SIM.NR) {
|
|||
attempts = ATTEMPTS
|
||||
)
|
||||
)
|
||||
dr <- dr[[truedim]]
|
||||
}
|
||||
|
||||
key <- paste0(name, '-', method)
|
||||
|
|
|
@ -0,0 +1,329 @@
|
|||
library(microbenchmark)
|
||||
|
||||
dyn.load("wip.so")
|
||||
|
||||
|
||||
## rowSum* .call --------------------------------------------------------------
|
||||
rowSums.c <- function(M) {
|
||||
stopifnot(
|
||||
is.matrix(M),
|
||||
is.numeric(M)
|
||||
)
|
||||
if (!is.double(M)) {
|
||||
M <- matrix(as.double(M), nrow = nrow(M))
|
||||
}
|
||||
|
||||
.Call('R_rowSums', PACKAGE = 'wip', M)
|
||||
}
|
||||
colSums.c <- function(M) {
|
||||
stopifnot(
|
||||
is.matrix(M),
|
||||
is.numeric(M)
|
||||
)
|
||||
if (!is.double(M)) {
|
||||
M <- matrix(as.double(M), nrow = nrow(M))
|
||||
}
|
||||
|
||||
.Call('R_colSums', PACKAGE = 'wip', M)
|
||||
}
|
||||
rowSquareSums.c <- function(M) {
|
||||
stopifnot(
|
||||
is.matrix(M),
|
||||
is.numeric(M)
|
||||
)
|
||||
if (!is.double(M)) {
|
||||
M <- matrix(as.double(M), nrow = nrow(M))
|
||||
}
|
||||
|
||||
.Call('R_rowSquareSums', PACKAGE = 'wip', M)
|
||||
}
|
||||
rowSumsSymVec.c <- function(vecA, nrow, diag = 0.0) {
|
||||
stopifnot(
|
||||
is.vector(vecA),
|
||||
is.numeric(vecA),
|
||||
is.numeric(diag),
|
||||
nrow * (nrow - 1) == length(vecA) * 2
|
||||
)
|
||||
if (!is.double(vecA)) {
|
||||
vecA <- as.double(vecA)
|
||||
}
|
||||
.Call('R_rowSumsSymVec', PACKAGE = 'wip',
|
||||
vecA, as.integer(nrow), as.double(diag))
|
||||
}
|
||||
rowSweep.c <- function(A, v, op = '-') {
|
||||
stopifnot(
|
||||
is.matrix(A),
|
||||
is.numeric(v)
|
||||
)
|
||||
if (!is.double(A)) {
|
||||
A <- matrix(as.double(A), nrow = nrow(A))
|
||||
}
|
||||
if (!is.vector(v) || !is.double(v)) {
|
||||
v <- as.double(v)
|
||||
}
|
||||
stopifnot(
|
||||
nrow(A) == length(v),
|
||||
op %in% c('+', '-', '*', '/')
|
||||
)
|
||||
|
||||
.Call('R_rowSweep', PACKAGE = 'wip', A, v, op)
|
||||
}
|
||||
|
||||
## row*, col* tests ------------------------------------------------------------
|
||||
n <- 3000
|
||||
M <- matrix(runif(n * 12), n, 12)
|
||||
stopifnot(
|
||||
all.equal(rowSums(M^2), rowSums.c(M^2)),
|
||||
all.equal(colSums(M), colSums.c(M))
|
||||
)
|
||||
microbenchmark(
|
||||
rowSums = rowSums(M^2),
|
||||
rowSums.c = rowSums.c(M^2),
|
||||
rowSqSums.c = rowSquareSums.c(M)
|
||||
)
|
||||
microbenchmark(
|
||||
colSums = colSums(M),
|
||||
colSums.c = colSums.c(M)
|
||||
)
|
||||
|
||||
sum = rowSums(M)
|
||||
stopifnot(all.equal(
|
||||
sweep(M, 1, sum, FUN = `/`),
|
||||
rowSweep.c(M, sum, '/') # Col-Normalize)
|
||||
), all.equal(
|
||||
sweep(M, 1, sum, FUN = `/`),
|
||||
M / sum
|
||||
))
|
||||
microbenchmark(
|
||||
sweep = sweep(M, 1, sum, FUN = `/`),
|
||||
M / sum,
|
||||
rowSweep.c = rowSweep.c(M, sum, '/') # Col-Normalize)
|
||||
)
|
||||
|
||||
# Create symmetric matrix with constant diagonal entries.
|
||||
nrow <- 200
|
||||
diag <- 1.0
|
||||
Sym <- tcrossprod(runif(nrow))
|
||||
diag(Sym) <- diag
|
||||
# Get vectorized lower triangular part of `Sym` matrix.
|
||||
SymVec <- Sym[lower.tri(Sym)]
|
||||
stopifnot(all.equal(
|
||||
rowSums(Sym),
|
||||
rowSumsSymVec.c(SymVec, nrow, diag)
|
||||
))
|
||||
microbenchmark(
|
||||
rowSums = rowSums(Sym),
|
||||
rowSums.c = rowSums.c(Sym),
|
||||
rowSumsSymVec.c = rowSumsSymVec.c(SymVec, nrow, diag)
|
||||
)
|
||||
|
||||
## Matrix-Matrix opperation .call ---------------------------------------------
|
||||
matrixprod.c <- function(A, B) {
|
||||
stopifnot(
|
||||
is.matrix(A), is.numeric(A),
|
||||
is.matrix(B), is.numeric(B),
|
||||
ncol(A) == nrow(B)
|
||||
)
|
||||
if (!is.double(A)) {
|
||||
A <- matrix(as.double(A), nrow = nrow(A))
|
||||
}
|
||||
if (!is.double(B)) {
|
||||
B <- matrix(as.double(B), nrow = nrow(B))
|
||||
}
|
||||
|
||||
.Call('R_matrixprod', PACKAGE = 'wip', A, B)
|
||||
}
|
||||
crossprod.c <- function(A, B) {
|
||||
stopifnot(
|
||||
is.matrix(A), is.numeric(A),
|
||||
is.matrix(B), is.numeric(B),
|
||||
nrow(A) == nrow(B)
|
||||
)
|
||||
if (!is.double(A)) {
|
||||
A <- matrix(as.double(A), nrow = nrow(A))
|
||||
}
|
||||
if (!is.double(B)) {
|
||||
B <- matrix(as.double(B), nrow = nrow(B))
|
||||
}
|
||||
|
||||
.Call('R_crossprod', PACKAGE = 'wip', A, B)
|
||||
}
|
||||
skewSymRank2k.c <- function(A, B, alpha = 1, beta = 0) {
|
||||
stopifnot(
|
||||
is.matrix(A), is.numeric(A),
|
||||
is.matrix(B), is.numeric(B),
|
||||
all(dim(A) == dim(B)),
|
||||
is.numeric(alpha), length(alpha) == 1L,
|
||||
is.numeric(beta), length(beta) == 1L
|
||||
)
|
||||
if (!is.double(A)) {
|
||||
A <- matrix(as.double(A), nrow = nrow(A))
|
||||
}
|
||||
if (!is.double(B)) {
|
||||
B <- matrix(as.double(B), nrow = nrow(B))
|
||||
}
|
||||
|
||||
.Call('R_skewSymRank2k', PACKAGE = 'wip', A, B,
|
||||
as.double(alpha), as.double(beta))
|
||||
}
|
||||
|
||||
## Matrix-Matrix opperation tests ---------------------------------------------
|
||||
n <- 200
|
||||
k <- 100
|
||||
m <- 300
|
||||
|
||||
A <- matrix(runif(n * k), n, k)
|
||||
B <- matrix(runif(k * m), k, m)
|
||||
stopifnot(
|
||||
all.equal(A %*% B, matrixprod.c(A, B))
|
||||
)
|
||||
microbenchmark(
|
||||
"%*%" = A %*% B,
|
||||
matrixprod.c = matrixprod.c(A, B)
|
||||
)
|
||||
|
||||
A <- matrix(runif(k * n), k, n)
|
||||
B <- matrix(runif(k * m), k, m)
|
||||
stopifnot(
|
||||
all.equal(crossprod(A, B), crossprod.c(A, B))
|
||||
)
|
||||
microbenchmark(
|
||||
crossprod = crossprod(A, B),
|
||||
crossprod.c = crossprod.c(A, B)
|
||||
)
|
||||
|
||||
n <- 12
|
||||
k <- 11
|
||||
A <- matrix(runif(n * k), n, k)
|
||||
B <- matrix(runif(n * k), n, k)
|
||||
stopifnot(all.equal(
|
||||
A %*% t(B) - B %*% t(A), skewSymRank2k.c(A, B)
|
||||
))
|
||||
microbenchmark(
|
||||
A %*% t(B) - B %*% t(A),
|
||||
skewSymRank2k.c(A, B)
|
||||
)
|
||||
|
||||
## Orthogonal projection onto null space .Call --------------------------------
|
||||
nullProj.c <- function(B) {
|
||||
stopifnot(
|
||||
is.matrix(B), is.numeric(B)
|
||||
)
|
||||
if (!is.double(B)) {
|
||||
B <- matrix(as.double(B), nrow = nrow(B))
|
||||
}
|
||||
|
||||
.Call('R_nullProj', PACKAGE = 'wip', B)
|
||||
}
|
||||
## Orthogonal projection onto null space tests --------------------------------
|
||||
p <- 12
|
||||
q <- 10
|
||||
V <- qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q)))
|
||||
|
||||
# Projection matrix onto `span(V)`
|
||||
Q <- diag(1, p) - tcrossprod(V, V)
|
||||
stopifnot(
|
||||
all.equal(Q, nullProj.c(V))
|
||||
)
|
||||
microbenchmark(
|
||||
nullProj = diag(1, p) - tcrossprod(V, V),
|
||||
nullProj.c = nullProj.c(V)
|
||||
)
|
||||
|
||||
|
||||
# ## WIP for gradient. ----------------------------------------------------------
|
||||
|
||||
gradient.c <- function(X, X_diff, Y, V, h) {
|
||||
stopifnot(
|
||||
is.matrix(X), is.double(X),
|
||||
is.matrix(X_diff), is.double(X_diff),
|
||||
ncol(X_diff) == ncol(X), nrow(X_diff) == nrow(X) * (nrow(X) - 1) / 2,
|
||||
is.vector(Y) || (is.matrix(Y) && pmin(dim(Y)) == 1L), is.double(Y),
|
||||
length(Y) == nrow(X),
|
||||
is.matrix(V), is.double(V),
|
||||
nrow(V) == ncol(X),
|
||||
is.vector(h), is.numeric(h), length(h) == 1
|
||||
)
|
||||
|
||||
.Call('R_gradient', PACKAGE = 'wip',
|
||||
X, X_diff, as.double(Y), V, as.double(h));
|
||||
}
|
||||
|
||||
elem.pairs <- function(elements) {
|
||||
# Number of elements to match.
|
||||
n <- length(elements)
|
||||
# Create all combinations.
|
||||
pairs <- rbind(rep(elements, each=n), rep(elements, n))
|
||||
# Select unique combinations without self interaction.
|
||||
return(pairs[, pairs[1, ] < pairs[2, ]])
|
||||
}
|
||||
grad <- function(X, Y, V, h, persistent = TRUE) {
|
||||
n <- nrow(X)
|
||||
p <- ncol(X)
|
||||
|
||||
if (!persistent) {
|
||||
pair.index <- elem.pairs(seq(n))
|
||||
i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
|
||||
lower <- ((i - 1) * n) + j
|
||||
upper <- ((j - 1) * n) + i
|
||||
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||
}
|
||||
|
||||
# Projection matrix onto `span(V)`
|
||||
Q <- diag(1, p) - tcrossprod(V, V)
|
||||
# Vectorized distance matrix `D`.
|
||||
vecD <- rowSums((X_diff %*% Q)^2)
|
||||
|
||||
|
||||
# Weight matrix `W` (dnorm ... gaussean density function)
|
||||
W <- matrix(1, n, n) # `exp(0) == 1`
|
||||
W[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part
|
||||
W[upper] <- t.default(W)[upper] # Mirror lower tri. to upper
|
||||
W <- sweep(W, 2, colSums(W), FUN = `/`) # Col-Normalize
|
||||
|
||||
# Weighted `Y` momentums
|
||||
y1 <- Y %*% W # Result is 1D -> transposition irrelevant
|
||||
y2 <- Y^2 %*% W
|
||||
|
||||
# Per example loss `L(V, X_i)`
|
||||
L <- y2 - y1^2
|
||||
|
||||
# Vectorized Weights with forced symmetry
|
||||
vecS <- (L[i] - (Y[j] - y1[i])^2) * W[lower]
|
||||
vecS <- vecS + ((L[j] - (Y[i] - y1[j])^2) * W[upper])
|
||||
# Compute scaling of `X` row differences
|
||||
vecS <- vecS * vecD
|
||||
|
||||
G <- crossprod(X_diff, X_diff * vecS) %*% V
|
||||
G <- (-2 / (n * h^2)) * G
|
||||
return(G)
|
||||
}
|
||||
rStiefl <- function(p, q) {
|
||||
return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
|
||||
}
|
||||
|
||||
n <- 200
|
||||
p <- 12
|
||||
q <- 10
|
||||
|
||||
X <- matrix(runif(n * p), n, p)
|
||||
Y <- runif(n)
|
||||
V <- rStiefl(p, q)
|
||||
h <- 0.1
|
||||
|
||||
pair.index <- elem.pairs(seq(n))
|
||||
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
|
||||
lower <- ((i - 1) * n) + j
|
||||
upper <- ((j - 1) * n) + i
|
||||
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||
|
||||
stopifnot(all.equal(
|
||||
grad(X, Y, V, h),
|
||||
gradient.c(X, X_diff, Y, V, h)
|
||||
))
|
||||
microbenchmark(
|
||||
grad = grad(X, Y, V, h),
|
||||
gradient.c = gradient.c(X, X_diff, Y, V, h)
|
||||
)
|
|
@ -0,0 +1,421 @@
|
|||
#include <stdlib.h>
|
||||
|
||||
#include <R_ext/BLAS.h>
|
||||
#include <R_ext/Lapack.h>
|
||||
#include <R_ext/Error.h>
|
||||
// #include <Rmath.h>
|
||||
|
||||
#include "wip.h"
|
||||
|
||||
static inline void rowSums(const double *A,
|
||||
const int nrow, const int ncol,
|
||||
double *sum) {
|
||||
int i, j, block_size, block_size_i;
|
||||
const double *A_block = A;
|
||||
const double *A_end = A + nrow * ncol;
|
||||
|
||||
if (nrow > CVE_MEM_CHUNK_SIZE) {
|
||||
block_size = CVE_MEM_CHUNK_SIZE;
|
||||
} else {
|
||||
block_size = nrow;
|
||||
}
|
||||
|
||||
// Iterate `(block_size_i, ncol)` submatrix blocks.
|
||||
for (i = 0; i < nrow; i += block_size_i) {
|
||||
// Reset `A` to new block beginning.
|
||||
A = A_block;
|
||||
// Take block size of eveything left and reduce to max size.
|
||||
block_size_i = nrow - i;
|
||||
if (block_size_i > block_size) {
|
||||
block_size_i = block_size;
|
||||
}
|
||||
// Compute first blocks column,
|
||||
for (j = 0; j < block_size_i; ++j) {
|
||||
sum[j] = A[j];
|
||||
}
|
||||
// and sum the following columns to the first one.
|
||||
for (A += nrow; A < A_end; A += nrow) {
|
||||
for (j = 0; j < block_size_i; ++j) {
|
||||
sum[j] += A[j];
|
||||
}
|
||||
}
|
||||
// Step one block forth.
|
||||
A_block += block_size_i;
|
||||
sum += block_size_i;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void colSums(const double *A,
|
||||
const int nrow, const int ncol,
|
||||
double *sum) {
|
||||
int j;
|
||||
double *sum_end = sum + ncol;
|
||||
|
||||
memset(sum, 0, sizeof(double) * ncol);
|
||||
for (; sum < sum_end; ++sum) {
|
||||
for (j = 0; j < nrow; ++j) {
|
||||
*sum += A[j];
|
||||
}
|
||||
A += nrow;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void rowSquareSums(const double *A,
|
||||
const int nrow, const int ncol,
|
||||
double *sum) {
|
||||
int i, j, block_size, block_size_i;
|
||||
const double *A_block = A;
|
||||
const double *A_end = A + nrow * ncol;
|
||||
|
||||
if (nrow < CVE_MEM_CHUNK_SIZE) {
|
||||
block_size = nrow;
|
||||
} else {
|
||||
block_size = CVE_MEM_CHUNK_SIZE;
|
||||
}
|
||||
|
||||
// Iterate `(block_size_i, ncol)` submatrix blocks.
|
||||
for (i = 0; i < nrow; i += block_size_i) {
|
||||
// Reset `A` to new block beginning.
|
||||
A = A_block;
|
||||
// Take block size of eveything left and reduce to max size.
|
||||
block_size_i = nrow - i;
|
||||
if (block_size_i > block_size) {
|
||||
block_size_i = block_size;
|
||||
}
|
||||
// Compute first blocks column,
|
||||
for (j = 0; j < block_size_i; ++j) {
|
||||
sum[j] = A[j] * A[j];
|
||||
}
|
||||
// and sum the following columns to the first one.
|
||||
for (A += nrow; A < A_end; A += nrow) {
|
||||
for (j = 0; j < block_size_i; ++j) {
|
||||
sum[j] += A[j] * A[j];
|
||||
}
|
||||
}
|
||||
// Step one block forth.
|
||||
A_block += block_size_i;
|
||||
sum += block_size_i;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void rowSumsSymVec(const double *Avec, const int nrow,
|
||||
const double *diag,
|
||||
double *sum) {
|
||||
int i, j;
|
||||
|
||||
if (*diag == 0.0) {
|
||||
memset(sum, 0, nrow * sizeof(double));
|
||||
} else {
|
||||
for (i = 0; i < nrow; ++i) {
|
||||
sum[i] = *diag;
|
||||
}
|
||||
}
|
||||
|
||||
for (j = 0; j < nrow; ++j) {
|
||||
for (i = j + 1; i < nrow; ++i, ++Avec) {
|
||||
sum[j] += *Avec;
|
||||
sum[i] += *Avec;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* C[, j] = A[, j] * v for each j = 1 to ncol */
|
||||
static void rowSweep(const double *A, const int nrow, const int ncol,
|
||||
const char* op,
|
||||
const double *v, // vector of length nrow
|
||||
double *C) {
|
||||
int i, j, block_size, block_size_i;
|
||||
const double *A_block = A;
|
||||
double *C_block = C;
|
||||
const double *A_end = A + nrow * ncol;
|
||||
|
||||
if (nrow > CVE_MEM_CHUNK_SMALL) { // small because 3 vectors in cache
|
||||
block_size = CVE_MEM_CHUNK_SMALL;
|
||||
} else {
|
||||
block_size = nrow;
|
||||
}
|
||||
|
||||
if (*op == '+') {
|
||||
// Iterate `(block_size_i, ncol)` submatrix blocks.
|
||||
for (i = 0; i < nrow; i += block_size_i) {
|
||||
// Set `A` and `C` to block beginning.
|
||||
A = A_block;
|
||||
C = C_block;
|
||||
// Get current block's row size.
|
||||
block_size_i = nrow - i;
|
||||
if (block_size_i > block_size) {
|
||||
block_size_i = block_size;
|
||||
}
|
||||
// Perform element wise operation for block.
|
||||
for (; A < A_end; A += nrow, C += nrow) {
|
||||
for (j = 0; j < block_size_i; ++j) {
|
||||
C[j] = A[j] + v[j]; // FUN = '+'
|
||||
}
|
||||
}
|
||||
// Step one block forth.
|
||||
A_block += block_size_i;
|
||||
C_block += block_size_i;
|
||||
v += block_size_i;
|
||||
}
|
||||
} else if (*op == '-') {
|
||||
// Iterate `(block_size_i, ncol)` submatrix blocks.
|
||||
for (i = 0; i < nrow; i += block_size_i) {
|
||||
// Set `A` and `C` to block beginning.
|
||||
A = A_block;
|
||||
C = C_block;
|
||||
// Get current block's row size.
|
||||
block_size_i = nrow - i;
|
||||
if (block_size_i > block_size) {
|
||||
block_size_i = block_size;
|
||||
}
|
||||
// Perform element wise operation for block.
|
||||
for (; A < A_end; A += nrow, C += nrow) {
|
||||
for (j = 0; j < block_size_i; ++j) {
|
||||
C[j] = A[j] - v[j]; // FUN = '-'
|
||||
}
|
||||
}
|
||||
// Step one block forth.
|
||||
A_block += block_size_i;
|
||||
C_block += block_size_i;
|
||||
v += block_size_i;
|
||||
}
|
||||
} else if (*op == '*') {
|
||||
// Iterate `(block_size_i, ncol)` submatrix blocks.
|
||||
for (i = 0; i < nrow; i += block_size_i) {
|
||||
// Set `A` and `C` to block beginning.
|
||||
A = A_block;
|
||||
C = C_block;
|
||||
// Get current block's row size.
|
||||
block_size_i = nrow - i;
|
||||
if (block_size_i > block_size) {
|
||||
block_size_i = block_size;
|
||||
}
|
||||
// Perform element wise operation for block.
|
||||
for (; A < A_end; A += nrow, C += nrow) {
|
||||
for (j = 0; j < block_size_i; ++j) {
|
||||
C[j] = A[j] * v[j]; // FUN = '*'
|
||||
}
|
||||
}
|
||||
// Step one block forth.
|
||||
A_block += block_size_i;
|
||||
C_block += block_size_i;
|
||||
v += block_size_i;
|
||||
}
|
||||
} else if (*op == '/') {
|
||||
// Iterate `(block_size_i, ncol)` submatrix blocks.
|
||||
for (i = 0; i < nrow; i += block_size_i) {
|
||||
// Set `A` and `C` to block beginning.
|
||||
A = A_block;
|
||||
C = C_block;
|
||||
// Get current block's row size.
|
||||
block_size_i = nrow - i;
|
||||
if (block_size_i > block_size) {
|
||||
block_size_i = block_size;
|
||||
}
|
||||
// Perform element wise operation for block.
|
||||
for (; A < A_end; A += nrow, C += nrow) {
|
||||
for (j = 0; j < block_size_i; ++j) {
|
||||
C[j] = A[j] / v[j]; // FUN = '/'
|
||||
}
|
||||
}
|
||||
// Step one block forth.
|
||||
A_block += block_size_i;
|
||||
C_block += block_size_i;
|
||||
v += block_size_i;
|
||||
}
|
||||
} else {
|
||||
error("Got unknown 'op' (opperation) argument.");
|
||||
}
|
||||
}
|
||||
|
||||
static inline void matrixprod(const double *A, const int nrowA, const int ncolA,
|
||||
const double *B, const int nrowB, const int ncolB,
|
||||
double *C) {
|
||||
const double one = 1.0;
|
||||
const double zero = 0.0;
|
||||
|
||||
// DGEMM with parameterization:
|
||||
// C <- A %*% B
|
||||
F77_NAME(dgemm)("N", "N", &nrowA, &ncolB, &ncolA,
|
||||
&one, A, &nrowA, B, &nrowB,
|
||||
&zero, C, &nrowA);
|
||||
}
|
||||
|
||||
static inline void crossprod(const double *A, const int nrowA, const int ncolA,
|
||||
const double *B, const int nrowB, const int ncolB,
|
||||
double *C) {
|
||||
const double one = 1.0;
|
||||
const double zero = 0.0;
|
||||
|
||||
// DGEMM with parameterization:
|
||||
// C <- t(A) %*% B
|
||||
F77_NAME(dgemm)("T", "N", &ncolA, &ncolB, &nrowA,
|
||||
&one, A, &nrowA, B, &nrowB,
|
||||
&zero, C, &ncolA);
|
||||
}
|
||||
|
||||
static inline void nullProj(const double *B, const int nrowB, const int ncolB,
|
||||
double *Q) {
|
||||
const double minusOne = -1.0;
|
||||
const double one = 1.0;
|
||||
|
||||
// Initialize as identity matrix.
|
||||
memset(Q, 0, sizeof(double) * nrowB * nrowB);
|
||||
double *Q_diag, *Q_end = Q + nrowB * nrowB;
|
||||
for (Q_diag = Q; Q_diag < Q_end; Q_diag += nrowB + 1) {
|
||||
*Q_diag = 1.0;
|
||||
}
|
||||
|
||||
// DGEMM with parameterization:
|
||||
// C <- (-1.0 * B %*% t(B)) + C
|
||||
F77_NAME(dgemm)("N", "T", &nrowB, &nrowB, &ncolB,
|
||||
&minusOne, B, &nrowB, B, &nrowB,
|
||||
&one, Q, &nrowB);
|
||||
}
|
||||
|
||||
static inline void rangePairs(const int from, const int to, int *pairs) {
|
||||
int i, j;
|
||||
for (i = from; i < to; ++i) {
|
||||
for (j = i + 1; j < to; ++j) {
|
||||
pairs[0] = i;
|
||||
pairs[1] = j;
|
||||
pairs += 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A dence skwe-symmetric rank 2 update.
|
||||
// Perform the update
|
||||
// C := alpha (A * B^T - B * A^T) + beta C
|
||||
static void skewSymRank2k(const int nrow, const int ncol,
|
||||
double alpha, const double *A, const double *B,
|
||||
double beta,
|
||||
double *C) {
|
||||
F77_NAME(dgemm)("N", "T",
|
||||
&nrow, &nrow, &ncol,
|
||||
&alpha, A, &nrow, B, &nrow,
|
||||
&beta, C, &nrow);
|
||||
|
||||
alpha *= -1.0;
|
||||
beta = 1.0;
|
||||
F77_NAME(dgemm)("N", "T",
|
||||
&nrow, &nrow, &ncol,
|
||||
&alpha, B, &nrow, A, &nrow,
|
||||
&beta, C, &nrow);
|
||||
}
|
||||
|
||||
// TODO: mutch potential for optimization!!!
|
||||
static inline void weightedYandLoss(const int n,
|
||||
const double *Y,
|
||||
const double *vecD,
|
||||
const double *vecW,
|
||||
const double *colSums,
|
||||
double *y1, double *L, double *vecS,
|
||||
double *const loss) {
|
||||
int i, j, k, N = n * (n - 1) / 2;
|
||||
double l;
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
y1[i] = Y[i] / colSums[i];
|
||||
L[i] = Y[i] * Y[i] / colSums[i];
|
||||
}
|
||||
|
||||
for (k = j = 0; j < n; ++j) {
|
||||
for (i = j + 1; i < n; ++i, ++k) {
|
||||
y1[i] += Y[j] * vecW[k] / colSums[i];
|
||||
y1[j] += Y[i] * vecW[k] / colSums[j];
|
||||
L[i] += Y[j] * Y[j] * vecW[k] / colSums[i];
|
||||
L[j] += Y[i] * Y[i] * vecW[k] / colSums[j];
|
||||
}
|
||||
}
|
||||
|
||||
l = 0.0;
|
||||
for (i = 0; i < n; ++i) {
|
||||
l += (L[i] -= y1[i] * y1[i]);
|
||||
}
|
||||
*loss = l / (double)n;
|
||||
|
||||
for (k = j = 0; j < n; ++j) {
|
||||
for (i = j + 1; i < n; ++i, ++k) {
|
||||
l = Y[j] - y1[i];
|
||||
vecS[k] = (L[i] - (l * l)) / colSums[i];
|
||||
l = Y[i] - y1[j];
|
||||
vecS[k] += (L[j] - (l * l)) / colSums[j];
|
||||
}
|
||||
}
|
||||
|
||||
for (k = 0; k < N; ++k) {
|
||||
vecS[k] *= vecW[k] * vecD[k];
|
||||
}
|
||||
}
|
||||
|
||||
inline double gaussKernel(const double x, const double scale) {
|
||||
return exp(scale * x * x);
|
||||
}
|
||||
|
||||
static void gradient(const int n, const int p, const int q,
|
||||
const double *X,
|
||||
const double *X_diff,
|
||||
const double *Y,
|
||||
const double *V,
|
||||
const double h,
|
||||
double *G, double *const loss) {
|
||||
// Number of X_i to X_j not trivial pairs.
|
||||
int i, N = (n * (n - 1)) / 2;
|
||||
double scale = -0.5 / h;
|
||||
const double one = 1.0;
|
||||
|
||||
if (X_diff == (void*)0) {
|
||||
// TODO: ...
|
||||
}
|
||||
|
||||
// Allocate and compute projection matrix `Q = I_p - V * V^T`
|
||||
double *Q = (double*)malloc(p * p * sizeof(double));
|
||||
nullProj(V, p, q, Q);
|
||||
|
||||
// allocate and compute vectorized distance matrix with a temporary
|
||||
// projection of `X_diff`.
|
||||
double *vecD = (double*)malloc(N * sizeof(double));
|
||||
double *X_proj;
|
||||
if (p < 5) { // TODO: refine that!
|
||||
X_proj = (double*)malloc(N * 5 * sizeof(double));
|
||||
} else {
|
||||
X_proj = (double*)malloc(N * p * sizeof(double));
|
||||
}
|
||||
matrixprod(X_diff, N, p, Q, p, p, X_proj);
|
||||
rowSquareSums(X_proj, N, p, vecD);
|
||||
|
||||
// Apply kernel to distence vector for weights computation.
|
||||
double *vecW = X_proj; // reuse memory area, no longer needed.
|
||||
for (i = 0; i < N; ++i) {
|
||||
vecW[i] = gaussKernel(vecD[i], scale);
|
||||
}
|
||||
double *colSums = X_proj + N; // still allocated!
|
||||
rowSumsSymVec(vecW, n, &one, colSums); // rowSums = colSums cause Sym
|
||||
|
||||
// compute weighted responces of first end second momontum, aka y1, y2.
|
||||
double *y1 = X_proj + N + n;
|
||||
double *L = X_proj + N + (2 * n);
|
||||
// Allocate X_diff scaling vector `vecS`, not in `X_proj` mem area because
|
||||
// used symultanious to X_proj in final gradient computation.
|
||||
double *vecS = (double*)malloc(N * sizeof(double));
|
||||
weightedYandLoss(n, Y, vecD, vecW, colSums, y1, L, vecS, loss);
|
||||
|
||||
// compute the gradient using X_proj for intermidiate scaled X_diff.
|
||||
rowSweep(X_diff, N, p, "*", vecS, X_proj);
|
||||
// reuse Q which has the required dim (p, p).
|
||||
crossprod(X_diff, N, p, X_proj, N, p, Q);
|
||||
// Product with V
|
||||
matrixprod(Q, p, p, V, p, q, G);
|
||||
// And final scaling (TODO: move into matrixprod!)
|
||||
scale = -2.0 / (((double)n) * h * h);
|
||||
N = p * q;
|
||||
for (i = 0; i < N; ++i) {
|
||||
G[i] *= scale;
|
||||
}
|
||||
|
||||
free(vecS);
|
||||
free(X_proj);
|
||||
free(vecD);
|
||||
free(Q);
|
||||
}
|
|
@ -0,0 +1,159 @@
|
|||
#ifndef _CVE_INCLUDE_GUARD_
|
||||
#define _CVE_INCLUDE_GUARD_
|
||||
|
||||
#include <Rinternals.h>
|
||||
|
||||
#define CVE_MEM_CHUNK_SMALL 1016
|
||||
#define CVE_MEM_CHUNK_SIZE 2032
|
||||
|
||||
static inline void rowSums(const double *A,
|
||||
const int nrow, const int ncol,
|
||||
double *sum);
|
||||
SEXP R_rowSums(SEXP A) {
|
||||
SEXP sums = PROTECT(allocVector(REALSXP, nrows(A)));
|
||||
|
||||
rowSums(REAL(A), nrows(A), ncols(A), REAL(sums));
|
||||
|
||||
UNPROTECT(1);
|
||||
return sums;
|
||||
}
|
||||
|
||||
static inline void colSums(const double *A,
|
||||
const int nrow, const int ncol,
|
||||
double *sum);
|
||||
SEXP R_colSums(SEXP A) {
|
||||
SEXP sums = PROTECT(allocVector(REALSXP, ncols(A)));
|
||||
|
||||
colSums(REAL(A), nrows(A), ncols(A), REAL(sums));
|
||||
|
||||
UNPROTECT(1);
|
||||
return sums;
|
||||
}
|
||||
|
||||
static inline void rowSquareSums(const double*, const int, const int, double*);
|
||||
SEXP R_rowSquareSums(SEXP A) {
|
||||
SEXP result = PROTECT(allocVector(REALSXP, nrows(A)));
|
||||
|
||||
rowSquareSums(REAL(A), nrows(A), ncols(A), REAL(result));
|
||||
|
||||
UNPROTECT(1);
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline void rowSumsSymVec(const double *Avec, const int nrow,
|
||||
const double *diag,
|
||||
double *sum);
|
||||
SEXP R_rowSumsSymVec(SEXP Avec, SEXP nrow, SEXP diag) {
|
||||
SEXP sum = PROTECT(allocVector(REALSXP, *INTEGER(nrow)));
|
||||
|
||||
rowSumsSymVec(REAL(Avec), *INTEGER(nrow), REAL(diag), REAL(sum));
|
||||
|
||||
UNPROTECT(1);
|
||||
return sum;
|
||||
}
|
||||
|
||||
static void rowSweep(const double *A, const int nrow, const int ncol,
|
||||
const char* op,
|
||||
const double *v, // vector of length nrow
|
||||
double *C);
|
||||
SEXP R_rowSweep(SEXP A, SEXP v, SEXP op) {
|
||||
SEXP C = PROTECT(allocMatrix(REALSXP, nrows(A), ncols(A)));
|
||||
|
||||
rowSweep(REAL(A), nrows(A), ncols(A),
|
||||
CHAR(STRING_ELT(op, 0)),
|
||||
REAL(v), REAL(C));
|
||||
|
||||
UNPROTECT(1);
|
||||
return C;
|
||||
}
|
||||
|
||||
static inline void matrixprod(const double *A, const int nrowA, const int ncolA,
|
||||
const double *B, const int nrowB, const int ncolB,
|
||||
double *C);
|
||||
SEXP R_matrixprod(SEXP A, SEXP B) {
|
||||
SEXP C = PROTECT(allocMatrix(REALSXP, nrows(A), ncols(B)));
|
||||
|
||||
matrixprod(REAL(A), nrows(A), ncols(A),
|
||||
REAL(B), nrows(B), ncols(B),
|
||||
REAL(C));
|
||||
|
||||
UNPROTECT(1);
|
||||
return C;
|
||||
}
|
||||
|
||||
static inline void crossprod(const double* A, const int nrowA, const int ncolA,
|
||||
const double* B, const int ncolB, const int nrowB,
|
||||
double* C);
|
||||
SEXP R_crossprod(SEXP A, SEXP B) {
|
||||
SEXP C = PROTECT(allocMatrix(REALSXP, ncols(A), ncols(B)));
|
||||
|
||||
crossprod(REAL(A), nrows(A), ncols(A),
|
||||
REAL(B), nrows(B), ncols(B),
|
||||
REAL(C));
|
||||
|
||||
UNPROTECT(1);
|
||||
return C;
|
||||
}
|
||||
|
||||
static void skewSymRank2k(const int n, const int k,
|
||||
double alpha, const double *A, const double *B,
|
||||
double beta,
|
||||
double *C);
|
||||
SEXP R_skewSymRank2k(SEXP A, SEXP B, SEXP alpha, SEXP beta) {
|
||||
SEXP C = PROTECT(allocMatrix(REALSXP, nrows(A), nrows(A)));
|
||||
memset(REAL(C), 0, nrows(A) * nrows(A) * sizeof(double));
|
||||
|
||||
skewSymRank2k(nrows(A), ncols(A),
|
||||
*REAL(alpha), REAL(A), REAL(B),
|
||||
*REAL(beta), REAL(C));
|
||||
|
||||
UNPROTECT(1);
|
||||
return C;
|
||||
}
|
||||
|
||||
static inline void nullProj(const double* B, const int nrowB, const int ncolB,
|
||||
double* Q);
|
||||
SEXP R_nullProj(SEXP B) {
|
||||
SEXP Q = PROTECT(allocMatrix(REALSXP, nrows(B), nrows(B)));
|
||||
|
||||
nullProj(REAL(B), nrows(B), ncols(B), REAL(Q));
|
||||
|
||||
UNPROTECT(1);
|
||||
return Q;
|
||||
}
|
||||
|
||||
static inline void rangePairs(const int from, const int to, int *pairs);
|
||||
SEXP R_rangePairs(SEXP from, SEXP to) {
|
||||
int start = asInteger(from);
|
||||
int end = asInteger(to) + 1;
|
||||
int n = end - start;
|
||||
|
||||
SEXP out = PROTECT(allocMatrix(INTSXP, 2, n * (n - 1) / 2));
|
||||
rangePairs(start, end, INTEGER(out));
|
||||
|
||||
UNPROTECT(1);
|
||||
return out;
|
||||
}
|
||||
|
||||
static void gradient(const int n, const int p, const int q,
|
||||
const double *X,
|
||||
const double *X_diff,
|
||||
const double *Y,
|
||||
const double *V,
|
||||
const double h,
|
||||
double *G, double *const loss);
|
||||
SEXP R_gradient(SEXP X, SEXP X_diff, SEXP Y, SEXP V, SEXP h) {
|
||||
int N = (nrows(X) * (nrows(X) - 1)) / 2;
|
||||
|
||||
SEXP G = PROTECT(allocMatrix(REALSXP, nrows(V), ncols(V)));
|
||||
SEXP loss = PROTECT(allocVector(REALSXP, 1));
|
||||
|
||||
gradient(nrows(X), ncols(X), ncols(V),
|
||||
REAL(X), REAL(X_diff), REAL(Y), REAL(V), *REAL(h),
|
||||
REAL(G), REAL(loss));
|
||||
|
||||
UNPROTECT(2);
|
||||
return G;
|
||||
}
|
||||
|
||||
#endif /* _CVE_INCLUDE_GUARD_ */
|
Loading…
Reference in New Issue