wip: writing C gradient version
This commit is contained in:
parent
47917fe0bd
commit
4d2651fe8a
|
@ -1,88 +1,71 @@
|
||||||
# -----------------------------------------------------------------------------
|
# Usage:
|
||||||
# Program: runtime_test.R
|
# ~$ Rscript runtime_test.R
|
||||||
# Author: Loki
|
|
||||||
# Date: 2019.08.12
|
|
||||||
#
|
|
||||||
# Purpose:
|
|
||||||
# Comparing runtime of "MAVE" with "CVE".
|
|
||||||
#
|
|
||||||
# RevisionHistory:
|
|
||||||
# Loki -- 2019.08.12 initial creation
|
|
||||||
# -----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
# load CVE package
|
library(CVE) # load CVE
|
||||||
library(CVE)
|
|
||||||
# load MAVE package for comparison
|
|
||||||
library(MAVE)
|
|
||||||
|
|
||||||
# set nr of simulations per dataset
|
#' Writes progress to console.
|
||||||
nr.sim <- 10
|
tell.user <- function(name, start.time, i, length) {
|
||||||
|
cat("\rRunning Test (", name, "):",
|
||||||
# set names of datasets to run tests on
|
i, "/", length,
|
||||||
dataset.names <- c("M1", "M2", "M3", "M4", "M5")
|
" - elapsed:", format(Sys.time() - start.time), "\033[K")
|
||||||
|
}
|
||||||
#' Orthogonal projection to sub-space spanned by `B`
|
subspace.dist <- function(B1, B2){
|
||||||
#'
|
P1 <- B1 %*% solve(t(B1) %*% B1) %*% t(B1)
|
||||||
#' @param B Matrix
|
P2 <- B2 %*% solve(t(B2) %*% B2) %*% t(B2)
|
||||||
#' @return Orthogonal Projection Matrix
|
return(norm(P1 - P2, type = 'F'))
|
||||||
proj <- function(B) {
|
|
||||||
B %*% solve(t(B) %*% B) %*% t(B)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#' Compute nObs given dataset dimension \code{n}.
|
# Number of simulations
|
||||||
#'
|
SIM.NR <- 50
|
||||||
#' @param n Number of samples
|
# maximal number of iterations in curvilinear search algorithm
|
||||||
#' @return Numeric estimate of \code{nObs}
|
MAXIT <- 50
|
||||||
nObs <- function (n) { n^0.5 }
|
# number of arbitrary starting values for curvilinear optimization
|
||||||
|
ATTEMPTS <- 10
|
||||||
|
# set names of datasets
|
||||||
|
dataset.names <- c("M1", "M2", "M3", "M4", "M5")
|
||||||
|
# Set used CVE method
|
||||||
|
methods <- c("simple") # c("legacy", "simple", "sgd", "linesearch")
|
||||||
|
|
||||||
## prepare "logging"
|
# Setup error and time tracking variables
|
||||||
# result error, time, ... data.frame's
|
error <- matrix(NA, SIM.NR, length(methods) * length(dataset.names))
|
||||||
error <- matrix(nrow = nr.sim, ncol = 2 * length(dataset.names))
|
time <- matrix(NA, SIM.NR, ncol(error))
|
||||||
time <- matrix(nrow = nr.sim, ncol = 2 * length(dataset.names))
|
colnames(error) <- kronecker(paste0(dataset.names, '-'), methods, paste0)
|
||||||
# convert to data.frames
|
colnames(time) <- colnames(error)
|
||||||
error <- as.data.frame(error)
|
|
||||||
time <- as.data.frame(time)
|
|
||||||
# set names
|
|
||||||
names(error) <- kronecker(c("CVE.", "MAVE."), dataset.names, paste0)
|
|
||||||
names(time) <- kronecker(c("CVE.", "MAVE."), dataset.names, paste0)
|
|
||||||
|
|
||||||
# get current time
|
# only for telling user (to stdout)
|
||||||
|
count <- 0
|
||||||
start.time <- Sys.time()
|
start.time <- Sys.time()
|
||||||
## main comparison loop (iterate `nr.sim` times for each dataset)
|
# Start simulation loop.
|
||||||
for (i in seq_along(dataset.names)) {
|
for (sim in 1:SIM.NR) {
|
||||||
for (j in 1:nr.sim) {
|
# Repeat for each dataset.
|
||||||
name <- dataset.names[i]
|
for (name in dataset.names) {
|
||||||
# reporting progress
|
count <- count + 1
|
||||||
cat("\rRunning Test (", name, j , "):",
|
tell.user(name, start.time, count, SIM.NR * length(dataset.names))
|
||||||
(i - 1) * nr.sim + j, "/", length(dataset.names) * nr.sim,
|
|
||||||
" - Time since start:", format(Sys.time() - start.time), "\033[K")
|
# Create a new dataset
|
||||||
# sample new dataset
|
|
||||||
ds <- dataset(name)
|
ds <- dataset(name)
|
||||||
k <- ncol(ds$B) # real dim
|
# Prepare X, Y and combine to data matrix
|
||||||
data <- data.frame(X = ds$X, Y = ds$Y)
|
Y <- ds$Y
|
||||||
# call CVE
|
X <- ds$X
|
||||||
cve.time <- system.time(
|
data <- cbind(Y, X)
|
||||||
cve.res <- cve(Y ~ .,
|
# get dimensions
|
||||||
data = data,
|
dim <- ncol(X)
|
||||||
method = "simple",
|
truedim <- ncol(ds$B)
|
||||||
k = k)
|
|
||||||
)
|
for (method in methods) {
|
||||||
# call MAVE
|
dr.time <- system.time(
|
||||||
mave.time <- system.time(
|
dr <- cve.call(X, Y,
|
||||||
mave.res <- mave(Y ~ .,
|
method = method,
|
||||||
data = data,
|
k = truedim,
|
||||||
method = "meanMAVE")
|
attempts = ATTEMPTS
|
||||||
)
|
)
|
||||||
# compute real and approximated sub-space projections
|
)
|
||||||
P <- proj(ds$B) # real
|
dr <- dr[[truedim]]
|
||||||
P.cve <- proj(cve.res$B)
|
|
||||||
P.mave <- proj(mave.res$dir[[k]])
|
key <- paste0(name, '-', method)
|
||||||
# compute (and store) errors
|
error[sim, key] <- subspace.dist(dr$B, ds$B) / sqrt(2 * truedim)
|
||||||
error[j, paste0("CVE.", name)] <- norm(P - P.cve, 'F') / sqrt(2 * k)
|
time[sim, key] <- dr.time["elapsed"]
|
||||||
error[j, paste0("MAVE.", name)] <- norm(P - P.mave, 'F') / sqrt(2 * k)
|
}
|
||||||
# store run-times
|
|
||||||
time[j, paste0("CVE.", name)] <- cve.time["elapsed"]
|
|
||||||
time[j, paste0("MAVE.", name)] <- mave.time["elapsed"]
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -91,16 +74,16 @@ print(colMeans(time))
|
||||||
cat("\n## Error Means:\n")
|
cat("\n## Error Means:\n")
|
||||||
print(colMeans(error))
|
print(colMeans(error))
|
||||||
|
|
||||||
len <- length(dataset.names)
|
at <- seq(ncol(error)) + rep(seq(ncol(error) / length(methods)) - 1, each = length(methods))
|
||||||
boxplot(as.matrix(error),
|
boxplot(error,
|
||||||
main = paste0("Error (nr.sim = ", nr.sim, ")"),
|
main = paste0("Error (Nr of simulations ", SIM.NR, ")"),
|
||||||
ylab = expression(error == group("||", P[B] - P[hat(B)], "||")[F] / sqrt(2*k)),
|
ylab = "Error",
|
||||||
las = 2,
|
las = 2,
|
||||||
at = c(1:len, 1:len + len + 1)
|
at = at
|
||||||
)
|
)
|
||||||
boxplot(as.matrix(time),
|
boxplot(time,
|
||||||
main = paste0("Time (nr.sim = ", nr.sim, ")"),
|
main = paste0("Time (Nr of simulations ", SIM.NR, ")"),
|
||||||
ylab = "time [sec]",
|
ylab = "Time [sec]",
|
||||||
las = 2,
|
las = 2,
|
||||||
at = c(1:len, 1:len + len + 1)
|
at = at
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
Package: CVE
|
Package: CVEpureR
|
||||||
Type: Package
|
Type: Package
|
||||||
Title: Conditional Variance Estimator for Sufficient Dimension Reduction
|
Title: Conditional Variance Estimator for Sufficient Dimension Reduction
|
||||||
Version: 0.1
|
Version: 0.1
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
|
|
||||||
S3method(plot,cve)
|
S3method(plot,cve)
|
||||||
S3method(summary,cve)
|
S3method(summary,cve)
|
||||||
export(col.pair.apply)
|
|
||||||
export(cve)
|
export(cve)
|
||||||
export(cve.call)
|
export(cve.call)
|
||||||
export(cve.grid.search)
|
export(cve.grid.search)
|
||||||
|
@ -15,7 +14,6 @@ export(estimate.bandwidth)
|
||||||
export(grad)
|
export(grad)
|
||||||
export(null)
|
export(null)
|
||||||
export(rStiefl)
|
export(rStiefl)
|
||||||
export(row.pair.apply)
|
|
||||||
import(stats)
|
import(stats)
|
||||||
importFrom(graphics,lines)
|
importFrom(graphics,lines)
|
||||||
importFrom(graphics,plot)
|
importFrom(graphics,plot)
|
||||||
|
|
124
CVE_R/R/CVE.R
124
CVE_R/R/CVE.R
|
@ -1,3 +1,17 @@
|
||||||
|
#' Conditional Variance Estimator (CVE)
|
||||||
|
#'
|
||||||
|
#' Conditional Variance Estimator for Sufficient Dimension
|
||||||
|
#' Reduction
|
||||||
|
#'
|
||||||
|
#' TODO: And some details
|
||||||
|
#'
|
||||||
|
#'
|
||||||
|
#' @references Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
|
||||||
|
#'
|
||||||
|
#' @docType package
|
||||||
|
#' @author Loki
|
||||||
|
"_PACKAGE"
|
||||||
|
|
||||||
#' Implementation of the CVE method.
|
#' Implementation of the CVE method.
|
||||||
#'
|
#'
|
||||||
#' Conditional Variance Estimator (CVE) is a novel sufficient dimension
|
#' Conditional Variance Estimator (CVE) is a novel sufficient dimension
|
||||||
|
@ -40,7 +54,7 @@
|
||||||
#' @import stats
|
#' @import stats
|
||||||
#' @importFrom stats model.frame
|
#' @importFrom stats model.frame
|
||||||
#' @export
|
#' @export
|
||||||
cve <- function(formula, data, method = "simple", ...) {
|
cve <- function(formula, data, method = "simple", max.dim = 10, ...) {
|
||||||
# check for type of `data` if supplied and set default
|
# check for type of `data` if supplied and set default
|
||||||
if (missing(data)) {
|
if (missing(data)) {
|
||||||
data <- environment(formula)
|
data <- environment(formula)
|
||||||
|
@ -69,12 +83,8 @@ cve <- function(formula, data, method = "simple", ...) {
|
||||||
#' @param ... Method specific parameters.
|
#' @param ... Method specific parameters.
|
||||||
#' @rdname cve
|
#' @rdname cve
|
||||||
#' @export
|
#' @export
|
||||||
cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5, k, ...) {
|
cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5,
|
||||||
|
min.dim = 1, max.dim = 10, k, ...) {
|
||||||
# TODO: replace default value of `k` by `max.dim` when fast enough
|
|
||||||
if (missing(k)) {
|
|
||||||
stop("TODO: parameter `k` (rank(B)) is required, replace by `max.dim`.")
|
|
||||||
}
|
|
||||||
|
|
||||||
# parameter checking
|
# parameter checking
|
||||||
if (!is.matrix(X)) {
|
if (!is.matrix(X)) {
|
||||||
|
@ -90,21 +100,42 @@ cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5, k, ...) {
|
||||||
stop('X is one dimensional, no need for dimension reduction.')
|
stop('X is one dimensional, no need for dimension reduction.')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!missing(k)) {
|
||||||
|
min.dim <- as.integer(k)
|
||||||
|
max.dim <- as.integer(k)
|
||||||
|
} else {
|
||||||
|
min.dim <- as.integer(min.dim)
|
||||||
|
max.dim <- as.integer(min(max.dim, ncol(X) - 1L))
|
||||||
|
}
|
||||||
|
if (min.dim > max.dim) {
|
||||||
|
stop('`min.dim` bigger `max.dim`.')
|
||||||
|
}
|
||||||
|
if (max.dim >= ncol(X)) {
|
||||||
|
stop('`max.dim` must be smaller than `ncol(X)`.')
|
||||||
|
}
|
||||||
|
|
||||||
# Call specified method.
|
# Call specified method.
|
||||||
method <- tolower(method)
|
method <- tolower(method)
|
||||||
if (method == 'simple') {
|
call <- match.call()
|
||||||
dr <- cve_simple(X, Y, k, nObs = nObs, ...)
|
dr <- list()
|
||||||
} else if (method == 'linesearch') {
|
for (k in min.dim:max.dim) {
|
||||||
dr <- cve_linesearch(X, Y, k, nObs = nObs, ...)
|
if (method == 'simple') {
|
||||||
} else if (method == 'sgd') {
|
dr.k <- cve_simple(X, Y, k, nObs = nObs, ...)
|
||||||
dr <- cve_sgd(X, Y, k, nObs = nObs, ...)
|
} else if (method == 'linesearch') {
|
||||||
} else {
|
dr.k <- cve_linesearch(X, Y, k, nObs = nObs, ...)
|
||||||
stop('Got unknown method.')
|
} else if (method == 'sgd') {
|
||||||
|
dr.k <- cve_sgd(X, Y, k, nObs = nObs, ...)
|
||||||
|
} else {
|
||||||
|
stop('Got unknown method.')
|
||||||
|
}
|
||||||
|
dr.k$k <- k
|
||||||
|
class(dr.k) <- "cve.k"
|
||||||
|
dr[[k]] <- dr.k
|
||||||
}
|
}
|
||||||
|
|
||||||
# augment result information
|
# augment result information
|
||||||
dr$method <- method
|
dr.k$method <- method
|
||||||
dr$call <- match.call()
|
dr.k$call <- call
|
||||||
class(dr) <- "cve"
|
class(dr) <- "cve"
|
||||||
return(dr)
|
return(dr)
|
||||||
}
|
}
|
||||||
|
@ -134,41 +165,42 @@ cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5, k, ...) {
|
||||||
#' @export
|
#' @export
|
||||||
plot.cve <- function(x, ...) {
|
plot.cve <- function(x, ...) {
|
||||||
|
|
||||||
H <- x$history
|
|
||||||
H_1 <- H[!is.na(H[, 1]), 1]
|
|
||||||
|
|
||||||
defaults <- list(
|
# H <- x$history
|
||||||
main = "History",
|
# H_1 <- H[!is.na(H[, 1]), 1]
|
||||||
xlab = "Iterations i",
|
|
||||||
ylab = expression(loss == L[n](V^{(i)})),
|
|
||||||
xlim = c(1, nrow(H)),
|
|
||||||
ylim = c(0, max(H)),
|
|
||||||
type = "l"
|
|
||||||
)
|
|
||||||
|
|
||||||
call.plot <- match.call()
|
# defaults <- list(
|
||||||
keys <- names(defaults)
|
# main = "History",
|
||||||
keys <- keys[match(keys, names(call.plot)[-1], nomatch = 0) == 0]
|
# xlab = "Iterations i",
|
||||||
|
# ylab = expression(loss == L[n](V^{(i)})),
|
||||||
|
# xlim = c(1, nrow(H)),
|
||||||
|
# ylim = c(0, max(H)),
|
||||||
|
# type = "l"
|
||||||
|
# )
|
||||||
|
|
||||||
for (key in keys) {
|
# call.plot <- match.call()
|
||||||
call.plot[[key]] <- defaults[[key]]
|
# keys <- names(defaults)
|
||||||
}
|
# keys <- keys[match(keys, names(call.plot)[-1], nomatch = 0) == 0]
|
||||||
|
|
||||||
call.plot[[1L]] <- quote(plot)
|
# for (key in keys) {
|
||||||
call.plot$x <- quote(1:length(H_1))
|
# call.plot[[key]] <- defaults[[key]]
|
||||||
call.plot$y <- quote(H_1)
|
# }
|
||||||
|
|
||||||
eval(call.plot)
|
# call.plot[[1L]] <- quote(plot)
|
||||||
|
# call.plot$x <- quote(1:length(H_1))
|
||||||
|
# call.plot$y <- quote(H_1)
|
||||||
|
|
||||||
if (ncol(H) > 1) {
|
# eval(call.plot)
|
||||||
for (i in 2:ncol(H)) {
|
|
||||||
H_i <- H[H[, i] > 0, i]
|
# if (ncol(H) > 1) {
|
||||||
lines(1:length(H_i), H_i)
|
# for (i in 2:ncol(H)) {
|
||||||
}
|
# H_i <- H[H[, i] > 0, i]
|
||||||
}
|
# lines(1:length(H_i), H_i)
|
||||||
x.ends <- apply(H, 2, function(h) { length(h[!is.na(h)]) })
|
# }
|
||||||
y.ends <- apply(H, 2, function(h) { tail(h[!is.na(h)], n=1) })
|
# }
|
||||||
points(x.ends, y.ends)
|
# x.ends <- apply(H, 2, function(h) { length(h[!is.na(h)]) })
|
||||||
|
# y.ends <- apply(H, 2, function(h) { tail(h[!is.na(h)], n=1) })
|
||||||
|
# points(x.ends, y.ends)
|
||||||
}
|
}
|
||||||
|
|
||||||
#' Prints a summary of a \code{cve} result.
|
#' Prints a summary of a \code{cve} result.
|
||||||
|
|
|
@ -13,18 +13,14 @@ cve_linesearch <- function(X, Y, k,
|
||||||
slack = 0,
|
slack = 0,
|
||||||
epochs = 50L,
|
epochs = 50L,
|
||||||
attempts = 10L,
|
attempts = 10L,
|
||||||
max.linesearch.iter = 10L
|
max.linesearch.iter = 10L,
|
||||||
|
logger = NULL
|
||||||
) {
|
) {
|
||||||
# Set `grad` functions environment to enable if to find this environments
|
# Set `grad` functions environment to enable if to find this environments
|
||||||
# local variabels, needed to enable the manipulation of this local variables
|
# local variabels, needed to enable the manipulation of this local variables
|
||||||
# from within `grad`.
|
# from within `grad`.
|
||||||
environment(grad) <- environment()
|
environment(grad) <- environment()
|
||||||
|
|
||||||
# Setup histories.
|
|
||||||
loss.history <- matrix(NA, epochs, attempts)
|
|
||||||
error.history <- matrix(NA, epochs, attempts)
|
|
||||||
tau.history <- matrix(NA, epochs, attempts)
|
|
||||||
|
|
||||||
# Get dimensions.
|
# Get dimensions.
|
||||||
n <- nrow(X)
|
n <- nrow(X)
|
||||||
p <- ncol(X)
|
p <- ncol(X)
|
||||||
|
@ -44,8 +40,8 @@ cve_linesearch <- function(X, Y, k,
|
||||||
# Compute lookup indexes for symmetrie, lower/upper
|
# Compute lookup indexes for symmetrie, lower/upper
|
||||||
# triangular parts and vectorization.
|
# triangular parts and vectorization.
|
||||||
pair.index <- elem.pairs(seq(n))
|
pair.index <- elem.pairs(seq(n))
|
||||||
i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
|
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
|
||||||
j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
|
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
|
||||||
# Matrix of vectorized indices. (vec(index) -> seq)
|
# Matrix of vectorized indices. (vec(index) -> seq)
|
||||||
index <- matrix(seq(n * n), n, n)
|
index <- matrix(seq(n * n), n, n)
|
||||||
lower <- index[lower.tri(index)]
|
lower <- index[lower.tri(index)]
|
||||||
|
@ -73,6 +69,13 @@ cve_linesearch <- function(X, Y, k,
|
||||||
# Set last loss (aka, loss after applying the step).
|
# Set last loss (aka, loss after applying the step).
|
||||||
loss.last <- loss
|
loss.last <- loss
|
||||||
|
|
||||||
|
# Call logger with initial values before starting optimization.
|
||||||
|
if (is.function(logger)) {
|
||||||
|
epoch <- 0 # Set epoch count to 0 (only relevant for logging).
|
||||||
|
error <- NA
|
||||||
|
logger(environment())
|
||||||
|
}
|
||||||
|
|
||||||
## Start optimization loop.
|
## Start optimization loop.
|
||||||
for (epoch in 1:epochs) {
|
for (epoch in 1:epochs) {
|
||||||
|
|
||||||
|
@ -124,16 +127,16 @@ cve_linesearch <- function(X, Y, k,
|
||||||
# Compute error.
|
# Compute error.
|
||||||
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
|
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
|
||||||
|
|
||||||
# Write history.
|
|
||||||
loss.history[epoch, attempt] <- loss
|
|
||||||
error.history[epoch, attempt] <- error
|
|
||||||
tau.history[epoch, attempt] <- tau
|
|
||||||
|
|
||||||
# Check break condition (epoch check to skip ignored gradient calc).
|
# Check break condition (epoch check to skip ignored gradient calc).
|
||||||
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
|
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
|
||||||
if (error < tol | epoch >= epochs) {
|
if (error < tol | epoch >= epochs) {
|
||||||
# take last step and stop optimization.
|
# take last step and stop optimization.
|
||||||
V <- V.tau
|
V <- V.tau
|
||||||
|
# Final call to the logger before stopping optimization
|
||||||
|
if (is.function(logger)) {
|
||||||
|
G <- G.tau
|
||||||
|
logger(environment())
|
||||||
|
}
|
||||||
break()
|
break()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -141,6 +144,12 @@ cve_linesearch <- function(X, Y, k,
|
||||||
V <- V.tau
|
V <- V.tau
|
||||||
loss.last <- loss
|
loss.last <- loss
|
||||||
G <- G.tau
|
G <- G.tau
|
||||||
|
|
||||||
|
# Log after taking current step.
|
||||||
|
if (is.function(logger)) {
|
||||||
|
logger(environment())
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Check if current attempt improved previous ones
|
# Check if current attempt improved previous ones
|
||||||
|
@ -152,9 +161,6 @@ cve_linesearch <- function(X, Y, k,
|
||||||
}
|
}
|
||||||
|
|
||||||
return(list(
|
return(list(
|
||||||
loss.history = loss.history,
|
|
||||||
error.history = error.history,
|
|
||||||
tau.history = tau.history,
|
|
||||||
loss = loss.best,
|
loss = loss.best,
|
||||||
V = V.best,
|
V = V.best,
|
||||||
B = null(V.best),
|
B = null(V.best),
|
||||||
|
|
|
@ -10,17 +10,14 @@ cve_sgd <- function(X, Y, k,
|
||||||
tol = 1e-3,
|
tol = 1e-3,
|
||||||
epochs = 50L,
|
epochs = 50L,
|
||||||
batch.size = 16L,
|
batch.size = 16L,
|
||||||
attempts = 10L
|
attempts = 10L,
|
||||||
|
logger = NULL
|
||||||
) {
|
) {
|
||||||
# Set `grad` functions environment to enable if to find this environments
|
# Set `grad` functions environment to enable if to find this environments
|
||||||
# local variabels, needed to enable the manipulation of this local variables
|
# local variabels, needed to enable the manipulation of this local variables
|
||||||
# from within `grad`.
|
# from within `grad`.
|
||||||
environment(grad) <- environment()
|
environment(grad) <- environment()
|
||||||
|
|
||||||
# Setup histories.
|
|
||||||
loss.history <- matrix(NA, epochs, attempts)
|
|
||||||
error.history <- matrix(NA, epochs, attempts)
|
|
||||||
|
|
||||||
# Get dimensions.
|
# Get dimensions.
|
||||||
n <- nrow(X) # Number of samples.
|
n <- nrow(X) # Number of samples.
|
||||||
p <- ncol(X) # Data dimensions
|
p <- ncol(X) # Data dimensions
|
||||||
|
@ -32,7 +29,7 @@ cve_sgd <- function(X, Y, k,
|
||||||
tol <- sqrt(2 * q) * tol
|
tol <- sqrt(2 * q) * tol
|
||||||
|
|
||||||
# Estaimate bandwidth if not given.
|
# Estaimate bandwidth if not given.
|
||||||
if (missing(h) | !is.numeric(h)) {
|
if (missing(h) || !is.numeric(h)) {
|
||||||
h <- estimate.bandwidth(X, k, nObs)
|
h <- estimate.bandwidth(X, k, nObs)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -40,12 +37,11 @@ cve_sgd <- function(X, Y, k,
|
||||||
# Compute lookup indexes for symmetrie, lower/upper
|
# Compute lookup indexes for symmetrie, lower/upper
|
||||||
# triangular parts and vectorization.
|
# triangular parts and vectorization.
|
||||||
pair.index <- elem.pairs(seq(n))
|
pair.index <- elem.pairs(seq(n))
|
||||||
i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
|
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
|
||||||
j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
|
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
|
||||||
# Matrix of vectorized indices. (vec(index) -> seq)
|
# Index of vectorized matrix, for lower and upper triangular part.
|
||||||
index <- matrix(seq(n * n), n, n)
|
lower <- ((i - 1) * n) + j
|
||||||
lower <- index[lower.tri(index)]
|
upper <- ((j - 1) * n) + i
|
||||||
upper <- t(index)[lower]
|
|
||||||
|
|
||||||
# Create all pairewise differences of rows of `X`.
|
# Create all pairewise differences of rows of `X`.
|
||||||
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||||
|
@ -69,17 +65,23 @@ cve_sgd <- function(X, Y, k,
|
||||||
# Keep track of last `V` for computing error after an epoch.
|
# Keep track of last `V` for computing error after an epoch.
|
||||||
V.last <- V
|
V.last <- V
|
||||||
|
|
||||||
|
if (is.function(logger)) {
|
||||||
|
loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
|
||||||
|
error <- NA
|
||||||
|
epoch <- 0
|
||||||
|
logger(environment())
|
||||||
|
}
|
||||||
|
|
||||||
# Repeat `epochs` times
|
# Repeat `epochs` times
|
||||||
for (epoch in 1:epochs) {
|
for (epoch in 1:epochs) {
|
||||||
# Shuffle batches
|
# Shuffle batches
|
||||||
batch.shuffle <- sample(indices)
|
batch.shuffle <- sample(indices)
|
||||||
|
|
||||||
# Make a step for each batch.
|
# Make a step for each batch.
|
||||||
for (start in seq(1, n, batch.size)) {
|
for (batch.start in seq(1, n, batch.size)) {
|
||||||
# Select batch data indices.
|
# Select batch data indices.
|
||||||
batch <- batch.shuffle[start:(start + batch.size - 1)]
|
batch.end <- min(batch.start + batch.size - 1, length(batch.shuffle))
|
||||||
# Remove `NA`'s (when `n` isn't a multiple of `batch.size`).
|
batch <- batch.shuffle[batch.start:batch.end]
|
||||||
batch <- batch[!is.na(batch)]
|
|
||||||
|
|
||||||
# Compute batch gradient.
|
# Compute batch gradient.
|
||||||
loss <- NULL
|
loss <- NULL
|
||||||
|
@ -93,21 +95,24 @@ cve_sgd <- function(X, Y, k,
|
||||||
# Parallet transport (on Stiefl manifold) into direction of `G`.
|
# Parallet transport (on Stiefl manifold) into direction of `G`.
|
||||||
V <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
|
V <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
|
||||||
}
|
}
|
||||||
# Compute actuall loss after finishing optimization.
|
|
||||||
loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
|
|
||||||
# And the error for the history.
|
# And the error for the history.
|
||||||
error <- norm(V.last %*% t(V.last) - V %*% t(V), type = "F")
|
error <- norm(V.last %*% t(V.last) - V %*% t(V), type = "F")
|
||||||
V.last <- V
|
V.last <- V
|
||||||
|
|
||||||
# Finaly write history.
|
if (is.function(logger)) {
|
||||||
loss.history[epoch, attempt] <- loss
|
# Compute loss at end of epoch for logging.
|
||||||
error.history[epoch, attempt] <- error
|
loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
|
||||||
|
logger(environment())
|
||||||
|
}
|
||||||
|
|
||||||
# Check break condition.
|
# Check break condition.
|
||||||
if (error < tol) {
|
if (error < tol) {
|
||||||
break()
|
break()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
# Compute actual loss after finishing for comparing multiple attempts.
|
||||||
|
loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
|
||||||
|
|
||||||
# After each attempt, check if last attempt reached a better result.
|
# After each attempt, check if last attempt reached a better result.
|
||||||
if (loss < loss.best) {
|
if (loss < loss.best) {
|
||||||
loss.best <- loss
|
loss.best <- loss
|
||||||
|
@ -116,8 +121,6 @@ cve_sgd <- function(X, Y, k,
|
||||||
}
|
}
|
||||||
|
|
||||||
return(list(
|
return(list(
|
||||||
loss.history = loss.history,
|
|
||||||
error.history = error.history,
|
|
||||||
loss = loss.best,
|
loss = loss.best,
|
||||||
V = V.best,
|
V = V.best,
|
||||||
B = null(V.best),
|
B = null(V.best),
|
||||||
|
|
|
@ -10,21 +10,18 @@ cve_simple <- function(X, Y, k,
|
||||||
tol = 1e-3,
|
tol = 1e-3,
|
||||||
slack = 0,
|
slack = 0,
|
||||||
epochs = 50L,
|
epochs = 50L,
|
||||||
attempts = 10L
|
attempts = 10L,
|
||||||
|
logger = NULL
|
||||||
) {
|
) {
|
||||||
# Set `grad` functions environment to enable if to find this environments
|
# Set `grad` functions environment to enable if to find this environments
|
||||||
# local variabels, needed to enable the manipulation of this local variables
|
# local variabels, needed to enable the manipulation of this local variables
|
||||||
# from within `grad`.
|
# from within `grad`.
|
||||||
environment(grad) <- environment()
|
environment(grad) <- environment()
|
||||||
|
|
||||||
# Setup histories.
|
|
||||||
loss.history <- matrix(NA, epochs, attempts)
|
|
||||||
error.history <- matrix(NA, epochs, attempts)
|
|
||||||
|
|
||||||
# Get dimensions.
|
# Get dimensions.
|
||||||
n <- nrow(X)
|
n <- nrow(X) # Number of samples.
|
||||||
p <- ncol(X)
|
p <- ncol(X) # Data dimensions
|
||||||
q <- p - k
|
q <- p - k # Complement dimension of the SDR space.
|
||||||
|
|
||||||
# Save initial learning rate `tau`.
|
# Save initial learning rate `tau`.
|
||||||
tau.init <- tau
|
tau.init <- tau
|
||||||
|
@ -32,7 +29,7 @@ cve_simple <- function(X, Y, k,
|
||||||
tol <- sqrt(2 * q) * tol
|
tol <- sqrt(2 * q) * tol
|
||||||
|
|
||||||
# Estaimate bandwidth if not given.
|
# Estaimate bandwidth if not given.
|
||||||
if (missing(h) | !is.numeric(h)) {
|
if (missing(h) || !is.numeric(h)) {
|
||||||
h <- estimate.bandwidth(X, k, nObs)
|
h <- estimate.bandwidth(X, k, nObs)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -40,12 +37,11 @@ cve_simple <- function(X, Y, k,
|
||||||
# Compute lookup indexes for symmetrie, lower/upper
|
# Compute lookup indexes for symmetrie, lower/upper
|
||||||
# triangular parts and vectorization.
|
# triangular parts and vectorization.
|
||||||
pair.index <- elem.pairs(seq(n))
|
pair.index <- elem.pairs(seq(n))
|
||||||
i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
|
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
|
||||||
j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
|
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
|
||||||
# Matrix of vectorized indices. (vec(index) -> seq)
|
# Index of vectorized matrix, for lower and upper triangular part.
|
||||||
index <- matrix(seq(n * n), n, n)
|
lower <- ((i - 1) * n) + j
|
||||||
lower <- index[lower.tri(index)]
|
upper <- ((j - 1) * n) + i
|
||||||
upper <- t(index)[lower]
|
|
||||||
|
|
||||||
# Create all pairewise differences of rows of `X`.
|
# Create all pairewise differences of rows of `X`.
|
||||||
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||||
|
@ -58,8 +54,7 @@ cve_simple <- function(X, Y, k,
|
||||||
|
|
||||||
# Start loop for multiple attempts.
|
# Start loop for multiple attempts.
|
||||||
for (attempt in 1:attempts) {
|
for (attempt in 1:attempts) {
|
||||||
|
# Reset learning rate `tau`.
|
||||||
# reset step width `tau`
|
|
||||||
tau <- tau.init
|
tau <- tau.init
|
||||||
|
|
||||||
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
|
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
|
||||||
|
@ -75,6 +70,13 @@ cve_simple <- function(X, Y, k,
|
||||||
# Cayley transform matrix `A`
|
# Cayley transform matrix `A`
|
||||||
A <- (G %*% t(V)) - (V %*% t(G))
|
A <- (G %*% t(V)) - (V %*% t(G))
|
||||||
|
|
||||||
|
# Call logger with initial values before starting optimization.
|
||||||
|
if (is.function(logger)) {
|
||||||
|
epoch <- 0 # Set epoch count to 0 (only relevant for logging).
|
||||||
|
error <- NA
|
||||||
|
logger(environment())
|
||||||
|
}
|
||||||
|
|
||||||
## Start optimization loop.
|
## Start optimization loop.
|
||||||
for (epoch in 1:epochs) {
|
for (epoch in 1:epochs) {
|
||||||
# Apply learning rate `tau`.
|
# Apply learning rate `tau`.
|
||||||
|
@ -85,7 +87,7 @@ cve_simple <- function(X, Y, k,
|
||||||
# Loss at position after a step.
|
# Loss at position after a step.
|
||||||
loss <- grad(X, Y, V.tau, h, loss.only = TRUE, persistent = TRUE)
|
loss <- grad(X, Y, V.tau, h, loss.only = TRUE, persistent = TRUE)
|
||||||
|
|
||||||
# Check if step is appropriate
|
# Check if step is appropriate, iff not reduce learning rate.
|
||||||
if ((loss - loss.last) > slack * loss.last) {
|
if ((loss - loss.last) > slack * loss.last) {
|
||||||
tau <- tau / 2
|
tau <- tau / 2
|
||||||
next() # Keep position and try with smaller `tau`.
|
next() # Keep position and try with smaller `tau`.
|
||||||
|
@ -94,15 +96,15 @@ cve_simple <- function(X, Y, k,
|
||||||
# Compute error.
|
# Compute error.
|
||||||
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
|
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
|
||||||
|
|
||||||
# Write history.
|
|
||||||
loss.history[epoch, attempt] <- loss
|
|
||||||
error.history[epoch, attempt] <- error
|
|
||||||
|
|
||||||
# Check break condition (epoch check to skip ignored gradient calc).
|
# Check break condition (epoch check to skip ignored gradient calc).
|
||||||
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
|
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
|
||||||
if (error < tol | epoch >= epochs) {
|
if (error < tol || epoch >= epochs) {
|
||||||
# take last step and stop optimization.
|
# take last step and stop optimization.
|
||||||
V <- V.tau
|
V <- V.tau
|
||||||
|
# Call logger last time befor stoping.
|
||||||
|
if (is.function(logger)) {
|
||||||
|
logger(environment())
|
||||||
|
}
|
||||||
break()
|
break()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -110,6 +112,11 @@ cve_simple <- function(X, Y, k,
|
||||||
V <- V.tau
|
V <- V.tau
|
||||||
loss.last <- loss
|
loss.last <- loss
|
||||||
|
|
||||||
|
# Call logger after taking a step.
|
||||||
|
if (is.function(logger)) {
|
||||||
|
logger(environment())
|
||||||
|
}
|
||||||
|
|
||||||
# Compute gradient at new position.
|
# Compute gradient at new position.
|
||||||
G <- grad(X, Y, V, h, persistent = TRUE)
|
G <- grad(X, Y, V, h, persistent = TRUE)
|
||||||
|
|
||||||
|
@ -126,8 +133,6 @@ cve_simple <- function(X, Y, k,
|
||||||
}
|
}
|
||||||
|
|
||||||
return(list(
|
return(list(
|
||||||
loss.history = loss.history,
|
|
||||||
error.history = error.history,
|
|
||||||
loss = loss.best,
|
loss = loss.best,
|
||||||
V = V.best,
|
V = V.best,
|
||||||
B = null(V.best),
|
B = null(V.best),
|
||||||
|
|
|
@ -24,12 +24,11 @@ grad <- function(X, Y, V, h,
|
||||||
# Compute lookup indexes for symmetrie, lower/upper
|
# Compute lookup indexes for symmetrie, lower/upper
|
||||||
# triangular parts and vectorization.
|
# triangular parts and vectorization.
|
||||||
pair.index <- elem.pairs(seq(n))
|
pair.index <- elem.pairs(seq(n))
|
||||||
i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
|
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
|
||||||
j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
|
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
|
||||||
# Matrix of vectorized indices. (vec(index) -> seq)
|
# Index of vectorized matrix, for lower and upper triangular part.
|
||||||
index <- matrix(seq(n * n), n, n)
|
lower <- ((i - 1) * n) + j
|
||||||
lower <- index[lower.tri(index)]
|
upper <- ((j - 1) * n) + i
|
||||||
upper <- t.default(index)[lower]
|
|
||||||
|
|
||||||
# Create all pairewise differences of rows of `X`.
|
# Create all pairewise differences of rows of `X`.
|
||||||
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||||
|
@ -39,7 +38,7 @@ grad <- function(X, Y, V, h,
|
||||||
Q <- diag(1, p) - tcrossprod(V, V)
|
Q <- diag(1, p) - tcrossprod(V, V)
|
||||||
|
|
||||||
# Vectorized distance matrix `D`.
|
# Vectorized distance matrix `D`.
|
||||||
vecD <- rowSums((X_diff %*% Q)^2)
|
vecD <- colSums(tcrossprod(Q, X_diff)^2)
|
||||||
|
|
||||||
# Weight matrix `W` (dnorm ... gaussean density function)
|
# Weight matrix `W` (dnorm ... gaussean density function)
|
||||||
W <- matrix(1, n, n) # `exp(0) == 1`
|
W <- matrix(1, n, n) # `exp(0) == 1`
|
||||||
|
|
|
@ -22,42 +22,19 @@ null <- function(V) {
|
||||||
return(qr.Q(tmp, complete=TRUE)[, set, drop=FALSE])
|
return(qr.Q(tmp, complete=TRUE)[, set, drop=FALSE])
|
||||||
}
|
}
|
||||||
|
|
||||||
#' Creates a (numeric) matrix where each row contains
|
#' Creates a (numeric) matrix where each column contains
|
||||||
#' an element to element matching.
|
#' an element to element matching.
|
||||||
#' @param elements numeric vector of elements to match
|
#' @param elements numeric vector of elements to match
|
||||||
#' @return matrix of size `(n * (n - 1) / 2, 2)` for a argument of lenght `n`.
|
#' @return matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`.
|
||||||
#' @keywords internal
|
#' @keywords internal
|
||||||
|
#' @examples
|
||||||
|
#' elem.pairs(seq.int(2, 5))
|
||||||
#' @export
|
#' @export
|
||||||
elem.pairs <- function(elements) {
|
elem.pairs <- function(elements) {
|
||||||
# Number of elements to match.
|
# Number of elements to match.
|
||||||
n <- length(elements)
|
n <- length(elements)
|
||||||
# Create all combinations.
|
# Create all combinations.
|
||||||
pairs <- cbind(rep(elements, each=n), rep(elements, n))
|
pairs <- rbind(rep(elements, each=n), rep(elements, n))
|
||||||
# Select unique combinations without self interaction.
|
# Select unique combinations without self interaction.
|
||||||
return(pairs[pairs[, 1] < pairs[, 2], ])
|
return(pairs[, pairs[1, ] < pairs[2, ]])
|
||||||
}
|
|
||||||
|
|
||||||
#' Apply function to pairs of matrix rows or columns.
|
|
||||||
#'
|
|
||||||
#' \code{row.pair.apply} applies \code{FUN} to each unique row pair without self
|
|
||||||
#' interaction while \code{col.pair.apply} does the same for columns.
|
|
||||||
#' @param X Matrix.
|
|
||||||
#' @param FUN Function to apply to each pair.
|
|
||||||
#' @examples
|
|
||||||
#' X <- matrix(seq(12), 4, 3)
|
|
||||||
#' # matrix containing all row to row differences.
|
|
||||||
#' row.pair.apply(X, `-`)
|
|
||||||
#' @aliases row.pair.apply, col.pair.apply
|
|
||||||
#' @keywords internal
|
|
||||||
#' @export
|
|
||||||
row.pair.apply <- function(X, FUN) {
|
|
||||||
pairs <- elem.pairs(seq(nrow(X)))
|
|
||||||
return(FUN(X[pairs[, 1], ], X[pairs[, 2], ]))
|
|
||||||
}
|
|
||||||
#' @rdname row.pair.apply
|
|
||||||
#' @keywords internal
|
|
||||||
#' @export
|
|
||||||
col.pair.apply <- function(X, FUN) {
|
|
||||||
pairs <- elem.pairs(seq(ncol(X)))
|
|
||||||
return(FUN(X[, pairs[, 1]], X[, pairs[, 2]]))
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
runtime_test Runtime comparison of CVE against MAVE for M1 - M5 datasets.
|
||||||
|
logging Example of a logger function for cve algorithm analysis.
|
|
@ -0,0 +1,43 @@
|
||||||
|
library(CVEpureR)
|
||||||
|
|
||||||
|
# Setup histories.
|
||||||
|
(epochs <- 50)
|
||||||
|
(attempts <- 10)
|
||||||
|
loss.history <- matrix(NA, epochs + 1, attempts)
|
||||||
|
error.history <- matrix(NA, epochs + 1, attempts)
|
||||||
|
tau.history <- matrix(NA, epochs + 1, attempts)
|
||||||
|
true.error.history <- matrix(NA, epochs + 1, attempts)
|
||||||
|
|
||||||
|
# Create a dataset
|
||||||
|
ds <- dataset("M1")
|
||||||
|
X <- ds$X
|
||||||
|
Y <- ds$Y
|
||||||
|
B <- ds$B # the true `B`
|
||||||
|
(k <- ncol(ds$B))
|
||||||
|
|
||||||
|
# True projection matrix.
|
||||||
|
P <- B %*% solve(t(B) %*% B) %*% t(B)
|
||||||
|
# Define the logger for the `cve()` method.
|
||||||
|
logger <- function(env) {
|
||||||
|
# Note the `<<-` assignement!
|
||||||
|
loss.history[env$epoch + 1, env$attempt] <<- env$loss
|
||||||
|
error.history[env$epoch + 1, env$attempt] <<- env$error
|
||||||
|
tau.history[env$epoch + 1, env$attempt] <<- env$tau
|
||||||
|
# Compute true error by comparing to the true `B`
|
||||||
|
B.est <- null(env$V) # Function provided by CVE
|
||||||
|
P.est <- B.est %*% solve(t(B.est) %*% B.est) %*% t(B.est)
|
||||||
|
true.error <- norm(P - P.est, 'F') / sqrt(2 * k)
|
||||||
|
true.error.history[env$epoch + 1, env$attempt] <<- true.error
|
||||||
|
}
|
||||||
|
# Performe SDR for ONE `k`.
|
||||||
|
dr <- cve(Y ~ X, k = k, logger = logger, epochs = epochs, attempts = attempts)
|
||||||
|
# Plot history's
|
||||||
|
par(mfrow = c(2, 2))
|
||||||
|
matplot(loss.history, type = 'l', log = 'y', xlab = 'iter',
|
||||||
|
main = 'loss', ylab = expression(L(V[iter])))
|
||||||
|
matplot(error.history, type = 'l', log = 'y', xlab = 'iter',
|
||||||
|
main = 'error', ylab = 'error')
|
||||||
|
matplot(tau.history, type = 'l', log = 'y', xlab = 'iter',
|
||||||
|
main = 'tau', ylab = 'tau')
|
||||||
|
matplot(true.error.history, type = 'l', log = 'y', xlab = 'iter',
|
||||||
|
main = 'true error', ylab = 'true error')
|
|
@ -0,0 +1,89 @@
|
||||||
|
# Usage:
|
||||||
|
# ~$ Rscript runtime_test.R
|
||||||
|
|
||||||
|
library(CVEpureR) # load CVE
|
||||||
|
|
||||||
|
#' Writes progress to console.
|
||||||
|
tell.user <- function(name, start.time, i, length) {
|
||||||
|
cat("\rRunning Test (", name, "):",
|
||||||
|
i, "/", length,
|
||||||
|
" - elapsed:", format(Sys.time() - start.time), "\033[K")
|
||||||
|
}
|
||||||
|
subspace.dist <- function(B1, B2){
|
||||||
|
P1 <- B1 %*% solve(t(B1) %*% B1) %*% t(B1)
|
||||||
|
P2 <- B2 %*% solve(t(B2) %*% B2) %*% t(B2)
|
||||||
|
return(norm(P1 - P2, type = 'F'))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Number of simulations
|
||||||
|
SIM.NR <- 50
|
||||||
|
# maximal number of iterations in curvilinear search algorithm
|
||||||
|
MAXIT <- 50
|
||||||
|
# number of arbitrary starting values for curvilinear optimization
|
||||||
|
ATTEMPTS <- 10
|
||||||
|
# set names of datasets
|
||||||
|
dataset.names <- c("M1", "M2", "M3", "M4", "M5")
|
||||||
|
# Set used CVE method
|
||||||
|
methods <- c("simple") # c("legacy", "simple", "sgd", "linesearch")
|
||||||
|
|
||||||
|
# Setup error and time tracking variables
|
||||||
|
error <- matrix(NA, SIM.NR, length(methods) * length(dataset.names))
|
||||||
|
time <- matrix(NA, SIM.NR, ncol(error))
|
||||||
|
colnames(error) <- kronecker(paste0(dataset.names, '-'), methods, paste0)
|
||||||
|
colnames(time) <- colnames(error)
|
||||||
|
|
||||||
|
# only for telling user (to stdout)
|
||||||
|
count <- 0
|
||||||
|
start.time <- Sys.time()
|
||||||
|
# Start simulation loop.
|
||||||
|
for (sim in 1:SIM.NR) {
|
||||||
|
# Repeat for each dataset.
|
||||||
|
for (name in dataset.names) {
|
||||||
|
count <- count + 1
|
||||||
|
tell.user(name, start.time, count, SIM.NR * length(dataset.names))
|
||||||
|
|
||||||
|
# Create a new dataset
|
||||||
|
ds <- dataset(name)
|
||||||
|
# Prepare X, Y and combine to data matrix
|
||||||
|
Y <- ds$Y
|
||||||
|
X <- ds$X
|
||||||
|
data <- cbind(Y, X)
|
||||||
|
# get dimensions
|
||||||
|
dim <- ncol(X)
|
||||||
|
truedim <- ncol(ds$B)
|
||||||
|
|
||||||
|
for (method in methods) {
|
||||||
|
dr.time <- system.time(
|
||||||
|
dr <- cve.call(X, Y,
|
||||||
|
method = method,
|
||||||
|
k = truedim,
|
||||||
|
attempts = ATTEMPTS
|
||||||
|
)
|
||||||
|
)
|
||||||
|
dr <- dr[[truedim]]
|
||||||
|
|
||||||
|
key <- paste0(name, '-', method)
|
||||||
|
error[sim, key] <- subspace.dist(dr$B, ds$B) / sqrt(2 * truedim)
|
||||||
|
time[sim, key] <- dr.time["elapsed"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n\n## Time [sec] Means:\n")
|
||||||
|
print(colMeans(time))
|
||||||
|
cat("\n## Error Means:\n")
|
||||||
|
print(colMeans(error))
|
||||||
|
|
||||||
|
at <- seq(ncol(error)) + rep(seq(ncol(error) / length(methods)) - 1, each = length(methods))
|
||||||
|
boxplot(error,
|
||||||
|
main = paste0("Error (Nr of simulations ", SIM.NR, ")"),
|
||||||
|
ylab = "Error",
|
||||||
|
las = 2,
|
||||||
|
at = at
|
||||||
|
)
|
||||||
|
boxplot(time,
|
||||||
|
main = paste0("Time (Nr of simulations ", SIM.NR, ")"),
|
||||||
|
ylab = "Time [sec]",
|
||||||
|
las = 2,
|
||||||
|
at = at
|
||||||
|
)
|
|
@ -0,0 +1,20 @@
|
||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/CVE.R
|
||||||
|
\docType{package}
|
||||||
|
\name{CVEpureR-package}
|
||||||
|
\alias{CVEpureR}
|
||||||
|
\alias{CVEpureR-package}
|
||||||
|
\title{Conditional Variance Estimator (CVE)}
|
||||||
|
\description{
|
||||||
|
Conditional Variance Estimator for Sufficient Dimension
|
||||||
|
Reduction
|
||||||
|
}
|
||||||
|
\details{
|
||||||
|
TODO: And some details
|
||||||
|
}
|
||||||
|
\references{
|
||||||
|
Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
|
||||||
|
}
|
||||||
|
\author{
|
||||||
|
Loki
|
||||||
|
}
|
|
@ -5,9 +5,10 @@
|
||||||
\alias{cve.call}
|
\alias{cve.call}
|
||||||
\title{Implementation of the CVE method.}
|
\title{Implementation of the CVE method.}
|
||||||
\usage{
|
\usage{
|
||||||
cve(formula, data, method = "simple", ...)
|
cve(formula, data, method = "simple", max.dim = 10, ...)
|
||||||
|
|
||||||
cve.call(X, Y, method = "simple", nObs = nrow(X)^0.5, k, ...)
|
cve.call(X, Y, method = "simple", nObs = nrow(X)^0.5, min.dim = 1,
|
||||||
|
max.dim = 10, k, ...)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{formula}{Formel for the regression model defining `X`, `Y`.
|
\item{formula}{Formel for the regression model defining `X`, `Y`.
|
||||||
|
|
|
@ -7,7 +7,7 @@ conditions.}
|
||||||
\usage{
|
\usage{
|
||||||
cve_linesearch(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
|
cve_linesearch(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
|
||||||
tol = 0.001, rho1 = 0.1, rho2 = 0.9, slack = 0, epochs = 50L,
|
tol = 0.001, rho1 = 0.1, rho2 = 0.9, slack = 0, epochs = 50L,
|
||||||
attempts = 10L, max.linesearch.iter = 10L)
|
attempts = 10L, max.linesearch.iter = 10L, logger = NULL)
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
|
Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
|
||||||
|
|
|
@ -6,7 +6,8 @@
|
||||||
a classic GD method unsing no further tricks.}
|
a classic GD method unsing no further tricks.}
|
||||||
\usage{
|
\usage{
|
||||||
cve_sgd(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 0.01,
|
cve_sgd(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 0.01,
|
||||||
tol = 0.001, epochs = 50L, batch.size = 16L, attempts = 10L)
|
tol = 0.001, epochs = 50L, batch.size = 16L, attempts = 10L,
|
||||||
|
logger = NULL)
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Simple implementation of the CVE method. 'Simple' means that this method is
|
Simple implementation of the CVE method. 'Simple' means that this method is
|
||||||
|
|
|
@ -6,7 +6,8 @@
|
||||||
a classic GD method unsing no further tricks.}
|
a classic GD method unsing no further tricks.}
|
||||||
\usage{
|
\usage{
|
||||||
cve_simple(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
|
cve_simple(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
|
||||||
tol = 0.001, slack = 0, epochs = 50L, attempts = 10L)
|
tol = 0.001, slack = 0, epochs = 50L, attempts = 10L,
|
||||||
|
logger = NULL)
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Simple implementation of the CVE method. 'Simple' means that this method is
|
Simple implementation of the CVE method. 'Simple' means that this method is
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
% Please edit documentation in R/util.R
|
% Please edit documentation in R/util.R
|
||||||
\name{elem.pairs}
|
\name{elem.pairs}
|
||||||
\alias{elem.pairs}
|
\alias{elem.pairs}
|
||||||
\title{Creates a (numeric) matrix where each row contains
|
\title{Creates a (numeric) matrix where each column contains
|
||||||
an element to element matching.}
|
an element to element matching.}
|
||||||
\usage{
|
\usage{
|
||||||
elem.pairs(elements)
|
elem.pairs(elements)
|
||||||
|
@ -11,10 +11,13 @@ elem.pairs(elements)
|
||||||
\item{elements}{numeric vector of elements to match}
|
\item{elements}{numeric vector of elements to match}
|
||||||
}
|
}
|
||||||
\value{
|
\value{
|
||||||
matrix of size `(n * (n - 1) / 2, 2)` for a argument of lenght `n`.
|
matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`.
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Creates a (numeric) matrix where each row contains
|
Creates a (numeric) matrix where each column contains
|
||||||
an element to element matching.
|
an element to element matching.
|
||||||
}
|
}
|
||||||
|
\examples{
|
||||||
|
elem.pairs(seq.int(2, 5))
|
||||||
|
}
|
||||||
\keyword{internal}
|
\keyword{internal}
|
||||||
|
|
|
@ -1,27 +0,0 @@
|
||||||
% Generated by roxygen2: do not edit by hand
|
|
||||||
% Please edit documentation in R/util.R
|
|
||||||
\name{row.pair.apply}
|
|
||||||
\alias{row.pair.apply}
|
|
||||||
\alias{row.pair.apply,}
|
|
||||||
\alias{col.pair.apply}
|
|
||||||
\title{Apply function to pairs of matrix rows or columns.}
|
|
||||||
\usage{
|
|
||||||
row.pair.apply(X, FUN)
|
|
||||||
|
|
||||||
col.pair.apply(X, FUN)
|
|
||||||
}
|
|
||||||
\arguments{
|
|
||||||
\item{X}{Matrix.}
|
|
||||||
|
|
||||||
\item{FUN}{Function to apply to each pair.}
|
|
||||||
}
|
|
||||||
\description{
|
|
||||||
\code{row.pair.apply} applies \code{FUN} to each unique row pair without self
|
|
||||||
interaction while \code{col.pair.apply} does the same for columns.
|
|
||||||
}
|
|
||||||
\examples{
|
|
||||||
X <- matrix(seq(12), 4, 3)
|
|
||||||
# matrix containing all row to row differences.
|
|
||||||
row.pair.apply(X, `-`)
|
|
||||||
}
|
|
||||||
\keyword{internal}
|
|
109
notes.md
109
notes.md
|
@ -1,3 +1,12 @@
|
||||||
|
# General Notes for Souce Code analysis
|
||||||
|
## Search in multiple files.
|
||||||
|
Using the Linux `grep` program with the parameters `-rnw` and specifying a include files filter like the following example.
|
||||||
|
```bash
|
||||||
|
grep --include=*\.{c,h,R} -rnw '.' -e "sweep"
|
||||||
|
```
|
||||||
|
searches in all `C` source and header fils as well as `R` source files for the term _sweep_.
|
||||||
|
|
||||||
|
# Development
|
||||||
## Build and install.
|
## Build and install.
|
||||||
To build the package the `devtools` package is used. This also provides `roxygen2` which is used for documentation and authomatic creaton of the `NAMESPACE` file.
|
To build the package the `devtools` package is used. This also provides `roxygen2` which is used for documentation and authomatic creaton of the `NAMESPACE` file.
|
||||||
```R
|
```R
|
||||||
|
@ -29,6 +38,55 @@ install.packages(path, repos = NULL, type = "source")
|
||||||
```
|
```
|
||||||
**Note: I only recommend this approach during development.**
|
**Note: I only recommend this approach during development.**
|
||||||
|
|
||||||
|
# Analysing
|
||||||
|
## Logging (a `cve` run).
|
||||||
|
To log `loss`, `error` (estimated) the true error (error of current estimated `B` against the true `B`) or even the stepsize one can use the `logger` parameter. A `logger` is a function that gets the current `environment` of the CVE optimization methods (__do not alter this environment, only read from it__). This can be used to create logs like in the following example.
|
||||||
|
```R
|
||||||
|
library(CVE)
|
||||||
|
|
||||||
|
# Setup histories.
|
||||||
|
(epochs <- 50)
|
||||||
|
(attempts <- 10)
|
||||||
|
loss.history <- matrix(NA, epochs + 1, attempts)
|
||||||
|
error.history <- matrix(NA, epochs + 1, attempts)
|
||||||
|
tau.history <- matrix(NA, epochs + 1, attempts)
|
||||||
|
true.error.history <- matrix(NA, epochs + 1, attempts)
|
||||||
|
|
||||||
|
# Create a dataset
|
||||||
|
ds <- dataset("M1")
|
||||||
|
X <- ds$X
|
||||||
|
Y <- ds$Y
|
||||||
|
B <- ds$B # the true `B`
|
||||||
|
(k <- ncol(ds$B))
|
||||||
|
|
||||||
|
# True projection matrix.
|
||||||
|
P <- B %*% solve(t(B) %*% B) %*% t(B)
|
||||||
|
# Define the logger for the `cve()` method.
|
||||||
|
logger <- function(env) {
|
||||||
|
# Note the `<<-` assignement!
|
||||||
|
loss.history[env$epoch + 1, env$attempt] <<- env$loss
|
||||||
|
error.history[env$epoch + 1, env$attempt] <<- env$error
|
||||||
|
tau.history[env$epoch + 1, env$attempt] <<- env$tau
|
||||||
|
# Compute true error by comparing to the true `B`
|
||||||
|
B.est <- null(env$V) # Function provided by CVE
|
||||||
|
P.est <- B.est %*% solve(t(B.est) %*% B.est) %*% t(B.est)
|
||||||
|
true.error <- norm(P - P.est, 'F') / sqrt(2 * k)
|
||||||
|
true.error.history[env$epoch + 1, env$attempt] <<- true.error
|
||||||
|
}
|
||||||
|
# Performa SDR
|
||||||
|
dr <- cve(Y ~ X, k = k, logger = logger, epochs = epochs, attempts = attempts)
|
||||||
|
# Plot history's
|
||||||
|
par(mfrow = c(2, 2))
|
||||||
|
matplot(loss.history, type = 'l', log = 'y', xlab = 'iter',
|
||||||
|
main = 'loss', ylab = expression(L(V[iter])))
|
||||||
|
matplot(error.history, type = 'l', log = 'y', xlab = 'iter',
|
||||||
|
main = 'error', ylab = 'error')
|
||||||
|
matplot(tau.history, type = 'l', log = 'y', xlab = 'iter',
|
||||||
|
main = 'tau', ylab = 'tau')
|
||||||
|
matplot(true.error.history, type = 'l', log = 'y', xlab = 'iter',
|
||||||
|
main = 'true error', ylab = 'true error')
|
||||||
|
```
|
||||||
|
|
||||||
## Reading log files.
|
## Reading log files.
|
||||||
The runtime tests (upcomming further tests) are creating log files saved in `tmp/`. These log files are `CSV` files (actualy `TSV`) with a header storing the test results. Depending on the test the files may contain differnt data. As an example we use the runtime test logs which store in each line the `dataset`, the used `method` as well as the `error` (actual error of estimated `B` against real `B`) and the `time`. For reading and analysing the data see the following example.
|
The runtime tests (upcomming further tests) are creating log files saved in `tmp/`. These log files are `CSV` files (actualy `TSV`) with a header storing the test results. Depending on the test the files may contain differnt data. As an example we use the runtime test logs which store in each line the `dataset`, the used `method` as well as the `error` (actual error of estimated `B` against real `B`) and the `time`. For reading and analysing the data see the following example.
|
||||||
```R
|
```R
|
||||||
|
@ -66,7 +124,9 @@ jedi.seeks <- function() {
|
||||||
}
|
}
|
||||||
|
|
||||||
trooper.seeks()
|
trooper.seeks()
|
||||||
|
# [1] "These aren't the droids you're looking for."
|
||||||
jedi.seeks()
|
jedi.seeks()
|
||||||
|
# [1] "R2-D2", "C-3PO"
|
||||||
```
|
```
|
||||||
|
|
||||||
The next example ilustrates how to write (without local copies) to variables outside the functions local environment.
|
The next example ilustrates how to write (without local copies) to variables outside the functions local environment.
|
||||||
|
@ -127,27 +187,25 @@ library(microbenchmark)
|
||||||
|
|
||||||
A <- matrix(runif(120), 12, 10)
|
A <- matrix(runif(120), 12, 10)
|
||||||
|
|
||||||
# Matrix trace.
|
|
||||||
tr <- function(M) sum(diag(M))
|
|
||||||
|
|
||||||
# Check correctnes and benckmark performance.
|
# Check correctnes and benckmark performance.
|
||||||
stopifnot(
|
stopifnot(
|
||||||
all.equal(
|
all.equal(
|
||||||
tr(t(A) %*% A),
|
sum(diag(t(A) %*% A)), sum(diag(crossprod(A, A)))
|
||||||
sum(diag(t(A) %*% A)),
|
),
|
||||||
sum(A * A)
|
all.equal(
|
||||||
|
sum(diag(t(A) %*% A)), sum(A * A)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
microbenchmark(
|
microbenchmark(
|
||||||
tr(t(A) %*% A),
|
MM = sum(diag(t(A) %*% A)),
|
||||||
sum(diag(t(A) %*% A)),
|
cross = sum(diag(crossprod(A, A))),
|
||||||
sum(A * A)
|
elem = sum(A * A)
|
||||||
)
|
)
|
||||||
# Unit: nanoseconds
|
# Unit: nanoseconds
|
||||||
# expr min lq mean median uq max neval
|
# expr min lq mean median uq max neval
|
||||||
# tr(t(A) %*% A) 4335 4713 5076.36 4949.5 5402.5 7928 100
|
# MM 4232 4570.0 5138.81 4737 4956.0 40308 100
|
||||||
# sum(diag(t(A) %*% A)) 4106 4429 5233.89 4733.5 5057.5 49308 100
|
# cross 2523 2774.5 2974.93 2946 3114.5 5078 100
|
||||||
# sum(A * A) 540 681 777.07 740.0 818.5 3572 100
|
# elem 582 762.5 973.02 834 964.0 12945 100
|
||||||
```
|
```
|
||||||
|
|
||||||
```R
|
```R
|
||||||
|
@ -243,6 +301,31 @@ microbenchmark(
|
||||||
# outer 1141.479 1216.929 1404.702 1317.7315 1582.800 2531.766 100
|
# outer 1141.479 1216.929 1404.702 1317.7315 1582.800 2531.766 100
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Fast dist matrix computation (aka. row sum of squares).
|
||||||
|
```R
|
||||||
|
library(microbenchmark)
|
||||||
|
library(CVE)
|
||||||
|
|
||||||
|
(n <- 200)
|
||||||
|
(N <- n * (n - 1) / 2)
|
||||||
|
(p <- 12)
|
||||||
|
M <- matrix(runif(N * p), N, p)
|
||||||
|
|
||||||
|
stopifnot(
|
||||||
|
all.equal(rowSums(M^2), rowSums.c(M^2)),
|
||||||
|
all.equal(rowSums(M^2), rowSquareSums.c(M))
|
||||||
|
)
|
||||||
|
microbenchmark(
|
||||||
|
sums = rowSums(M^2),
|
||||||
|
sums.c = rowSums.c(M^2),
|
||||||
|
sqSums.c = rowSquareSums.c(M)
|
||||||
|
)
|
||||||
|
# Unit: microseconds
|
||||||
|
# expr min lq mean median uq max neval
|
||||||
|
# sums 666.311 1051.036 1612.3100 1139.0065 1547.657 13940.97 100
|
||||||
|
# sums.c 342.647 672.453 1009.9109 740.6255 1224.715 13765.90 100
|
||||||
|
# sqSums.c 115.325 142.128 175.6242 153.4645 169.678 759.87 100
|
||||||
|
```
|
||||||
|
|
||||||
## Using `Rprof()` for performance.
|
## Using `Rprof()` for performance.
|
||||||
The standart method for profiling where an algorithm is spending its time is with `Rprof()`.
|
The standart method for profiling where an algorithm is spending its time is with `Rprof()`.
|
||||||
|
|
|
@ -14,7 +14,7 @@ subspace.dist <- function(B1, B2){
|
||||||
}
|
}
|
||||||
|
|
||||||
# Number of simulations
|
# Number of simulations
|
||||||
SIM.NR <- 50
|
SIM.NR <- 20
|
||||||
# maximal number of iterations in curvilinear search algorithm
|
# maximal number of iterations in curvilinear search algorithm
|
||||||
MAXIT <- 50
|
MAXIT <- 50
|
||||||
# number of arbitrary starting values for curvilinear optimization
|
# number of arbitrary starting values for curvilinear optimization
|
||||||
|
@ -22,7 +22,7 @@ ATTEMPTS <- 10
|
||||||
# set names of datasets
|
# set names of datasets
|
||||||
dataset.names <- c("M1", "M2", "M3", "M4", "M5")
|
dataset.names <- c("M1", "M2", "M3", "M4", "M5")
|
||||||
# Set used CVE method
|
# Set used CVE method
|
||||||
methods <- c("simple") #, "sgd") # "legacy"
|
methods <- c("simple") # c("legacy", "simple", "sgd", "linesearch")
|
||||||
|
|
||||||
library(CVE) # load CVE
|
library(CVE) # load CVE
|
||||||
if ("legacy" %in% methods) {
|
if ("legacy" %in% methods) {
|
||||||
|
@ -83,6 +83,7 @@ for (sim in 1:SIM.NR) {
|
||||||
attempts = ATTEMPTS
|
attempts = ATTEMPTS
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
dr <- dr[[truedim]]
|
||||||
}
|
}
|
||||||
|
|
||||||
key <- paste0(name, '-', method)
|
key <- paste0(name, '-', method)
|
||||||
|
|
|
@ -0,0 +1,329 @@
|
||||||
|
library(microbenchmark)
|
||||||
|
|
||||||
|
dyn.load("wip.so")
|
||||||
|
|
||||||
|
|
||||||
|
## rowSum* .call --------------------------------------------------------------
|
||||||
|
rowSums.c <- function(M) {
|
||||||
|
stopifnot(
|
||||||
|
is.matrix(M),
|
||||||
|
is.numeric(M)
|
||||||
|
)
|
||||||
|
if (!is.double(M)) {
|
||||||
|
M <- matrix(as.double(M), nrow = nrow(M))
|
||||||
|
}
|
||||||
|
|
||||||
|
.Call('R_rowSums', PACKAGE = 'wip', M)
|
||||||
|
}
|
||||||
|
colSums.c <- function(M) {
|
||||||
|
stopifnot(
|
||||||
|
is.matrix(M),
|
||||||
|
is.numeric(M)
|
||||||
|
)
|
||||||
|
if (!is.double(M)) {
|
||||||
|
M <- matrix(as.double(M), nrow = nrow(M))
|
||||||
|
}
|
||||||
|
|
||||||
|
.Call('R_colSums', PACKAGE = 'wip', M)
|
||||||
|
}
|
||||||
|
rowSquareSums.c <- function(M) {
|
||||||
|
stopifnot(
|
||||||
|
is.matrix(M),
|
||||||
|
is.numeric(M)
|
||||||
|
)
|
||||||
|
if (!is.double(M)) {
|
||||||
|
M <- matrix(as.double(M), nrow = nrow(M))
|
||||||
|
}
|
||||||
|
|
||||||
|
.Call('R_rowSquareSums', PACKAGE = 'wip', M)
|
||||||
|
}
|
||||||
|
rowSumsSymVec.c <- function(vecA, nrow, diag = 0.0) {
|
||||||
|
stopifnot(
|
||||||
|
is.vector(vecA),
|
||||||
|
is.numeric(vecA),
|
||||||
|
is.numeric(diag),
|
||||||
|
nrow * (nrow - 1) == length(vecA) * 2
|
||||||
|
)
|
||||||
|
if (!is.double(vecA)) {
|
||||||
|
vecA <- as.double(vecA)
|
||||||
|
}
|
||||||
|
.Call('R_rowSumsSymVec', PACKAGE = 'wip',
|
||||||
|
vecA, as.integer(nrow), as.double(diag))
|
||||||
|
}
|
||||||
|
rowSweep.c <- function(A, v, op = '-') {
|
||||||
|
stopifnot(
|
||||||
|
is.matrix(A),
|
||||||
|
is.numeric(v)
|
||||||
|
)
|
||||||
|
if (!is.double(A)) {
|
||||||
|
A <- matrix(as.double(A), nrow = nrow(A))
|
||||||
|
}
|
||||||
|
if (!is.vector(v) || !is.double(v)) {
|
||||||
|
v <- as.double(v)
|
||||||
|
}
|
||||||
|
stopifnot(
|
||||||
|
nrow(A) == length(v),
|
||||||
|
op %in% c('+', '-', '*', '/')
|
||||||
|
)
|
||||||
|
|
||||||
|
.Call('R_rowSweep', PACKAGE = 'wip', A, v, op)
|
||||||
|
}
|
||||||
|
|
||||||
|
## row*, col* tests ------------------------------------------------------------
|
||||||
|
n <- 3000
|
||||||
|
M <- matrix(runif(n * 12), n, 12)
|
||||||
|
stopifnot(
|
||||||
|
all.equal(rowSums(M^2), rowSums.c(M^2)),
|
||||||
|
all.equal(colSums(M), colSums.c(M))
|
||||||
|
)
|
||||||
|
microbenchmark(
|
||||||
|
rowSums = rowSums(M^2),
|
||||||
|
rowSums.c = rowSums.c(M^2),
|
||||||
|
rowSqSums.c = rowSquareSums.c(M)
|
||||||
|
)
|
||||||
|
microbenchmark(
|
||||||
|
colSums = colSums(M),
|
||||||
|
colSums.c = colSums.c(M)
|
||||||
|
)
|
||||||
|
|
||||||
|
sum = rowSums(M)
|
||||||
|
stopifnot(all.equal(
|
||||||
|
sweep(M, 1, sum, FUN = `/`),
|
||||||
|
rowSweep.c(M, sum, '/') # Col-Normalize)
|
||||||
|
), all.equal(
|
||||||
|
sweep(M, 1, sum, FUN = `/`),
|
||||||
|
M / sum
|
||||||
|
))
|
||||||
|
microbenchmark(
|
||||||
|
sweep = sweep(M, 1, sum, FUN = `/`),
|
||||||
|
M / sum,
|
||||||
|
rowSweep.c = rowSweep.c(M, sum, '/') # Col-Normalize)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create symmetric matrix with constant diagonal entries.
|
||||||
|
nrow <- 200
|
||||||
|
diag <- 1.0
|
||||||
|
Sym <- tcrossprod(runif(nrow))
|
||||||
|
diag(Sym) <- diag
|
||||||
|
# Get vectorized lower triangular part of `Sym` matrix.
|
||||||
|
SymVec <- Sym[lower.tri(Sym)]
|
||||||
|
stopifnot(all.equal(
|
||||||
|
rowSums(Sym),
|
||||||
|
rowSumsSymVec.c(SymVec, nrow, diag)
|
||||||
|
))
|
||||||
|
microbenchmark(
|
||||||
|
rowSums = rowSums(Sym),
|
||||||
|
rowSums.c = rowSums.c(Sym),
|
||||||
|
rowSumsSymVec.c = rowSumsSymVec.c(SymVec, nrow, diag)
|
||||||
|
)
|
||||||
|
|
||||||
|
## Matrix-Matrix opperation .call ---------------------------------------------
|
||||||
|
matrixprod.c <- function(A, B) {
|
||||||
|
stopifnot(
|
||||||
|
is.matrix(A), is.numeric(A),
|
||||||
|
is.matrix(B), is.numeric(B),
|
||||||
|
ncol(A) == nrow(B)
|
||||||
|
)
|
||||||
|
if (!is.double(A)) {
|
||||||
|
A <- matrix(as.double(A), nrow = nrow(A))
|
||||||
|
}
|
||||||
|
if (!is.double(B)) {
|
||||||
|
B <- matrix(as.double(B), nrow = nrow(B))
|
||||||
|
}
|
||||||
|
|
||||||
|
.Call('R_matrixprod', PACKAGE = 'wip', A, B)
|
||||||
|
}
|
||||||
|
crossprod.c <- function(A, B) {
|
||||||
|
stopifnot(
|
||||||
|
is.matrix(A), is.numeric(A),
|
||||||
|
is.matrix(B), is.numeric(B),
|
||||||
|
nrow(A) == nrow(B)
|
||||||
|
)
|
||||||
|
if (!is.double(A)) {
|
||||||
|
A <- matrix(as.double(A), nrow = nrow(A))
|
||||||
|
}
|
||||||
|
if (!is.double(B)) {
|
||||||
|
B <- matrix(as.double(B), nrow = nrow(B))
|
||||||
|
}
|
||||||
|
|
||||||
|
.Call('R_crossprod', PACKAGE = 'wip', A, B)
|
||||||
|
}
|
||||||
|
skewSymRank2k.c <- function(A, B, alpha = 1, beta = 0) {
|
||||||
|
stopifnot(
|
||||||
|
is.matrix(A), is.numeric(A),
|
||||||
|
is.matrix(B), is.numeric(B),
|
||||||
|
all(dim(A) == dim(B)),
|
||||||
|
is.numeric(alpha), length(alpha) == 1L,
|
||||||
|
is.numeric(beta), length(beta) == 1L
|
||||||
|
)
|
||||||
|
if (!is.double(A)) {
|
||||||
|
A <- matrix(as.double(A), nrow = nrow(A))
|
||||||
|
}
|
||||||
|
if (!is.double(B)) {
|
||||||
|
B <- matrix(as.double(B), nrow = nrow(B))
|
||||||
|
}
|
||||||
|
|
||||||
|
.Call('R_skewSymRank2k', PACKAGE = 'wip', A, B,
|
||||||
|
as.double(alpha), as.double(beta))
|
||||||
|
}
|
||||||
|
|
||||||
|
## Matrix-Matrix opperation tests ---------------------------------------------
|
||||||
|
n <- 200
|
||||||
|
k <- 100
|
||||||
|
m <- 300
|
||||||
|
|
||||||
|
A <- matrix(runif(n * k), n, k)
|
||||||
|
B <- matrix(runif(k * m), k, m)
|
||||||
|
stopifnot(
|
||||||
|
all.equal(A %*% B, matrixprod.c(A, B))
|
||||||
|
)
|
||||||
|
microbenchmark(
|
||||||
|
"%*%" = A %*% B,
|
||||||
|
matrixprod.c = matrixprod.c(A, B)
|
||||||
|
)
|
||||||
|
|
||||||
|
A <- matrix(runif(k * n), k, n)
|
||||||
|
B <- matrix(runif(k * m), k, m)
|
||||||
|
stopifnot(
|
||||||
|
all.equal(crossprod(A, B), crossprod.c(A, B))
|
||||||
|
)
|
||||||
|
microbenchmark(
|
||||||
|
crossprod = crossprod(A, B),
|
||||||
|
crossprod.c = crossprod.c(A, B)
|
||||||
|
)
|
||||||
|
|
||||||
|
n <- 12
|
||||||
|
k <- 11
|
||||||
|
A <- matrix(runif(n * k), n, k)
|
||||||
|
B <- matrix(runif(n * k), n, k)
|
||||||
|
stopifnot(all.equal(
|
||||||
|
A %*% t(B) - B %*% t(A), skewSymRank2k.c(A, B)
|
||||||
|
))
|
||||||
|
microbenchmark(
|
||||||
|
A %*% t(B) - B %*% t(A),
|
||||||
|
skewSymRank2k.c(A, B)
|
||||||
|
)
|
||||||
|
|
||||||
|
## Orthogonal projection onto null space .Call --------------------------------
|
||||||
|
nullProj.c <- function(B) {
|
||||||
|
stopifnot(
|
||||||
|
is.matrix(B), is.numeric(B)
|
||||||
|
)
|
||||||
|
if (!is.double(B)) {
|
||||||
|
B <- matrix(as.double(B), nrow = nrow(B))
|
||||||
|
}
|
||||||
|
|
||||||
|
.Call('R_nullProj', PACKAGE = 'wip', B)
|
||||||
|
}
|
||||||
|
## Orthogonal projection onto null space tests --------------------------------
|
||||||
|
p <- 12
|
||||||
|
q <- 10
|
||||||
|
V <- qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q)))
|
||||||
|
|
||||||
|
# Projection matrix onto `span(V)`
|
||||||
|
Q <- diag(1, p) - tcrossprod(V, V)
|
||||||
|
stopifnot(
|
||||||
|
all.equal(Q, nullProj.c(V))
|
||||||
|
)
|
||||||
|
microbenchmark(
|
||||||
|
nullProj = diag(1, p) - tcrossprod(V, V),
|
||||||
|
nullProj.c = nullProj.c(V)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ## WIP for gradient. ----------------------------------------------------------
|
||||||
|
|
||||||
|
gradient.c <- function(X, X_diff, Y, V, h) {
|
||||||
|
stopifnot(
|
||||||
|
is.matrix(X), is.double(X),
|
||||||
|
is.matrix(X_diff), is.double(X_diff),
|
||||||
|
ncol(X_diff) == ncol(X), nrow(X_diff) == nrow(X) * (nrow(X) - 1) / 2,
|
||||||
|
is.vector(Y) || (is.matrix(Y) && pmin(dim(Y)) == 1L), is.double(Y),
|
||||||
|
length(Y) == nrow(X),
|
||||||
|
is.matrix(V), is.double(V),
|
||||||
|
nrow(V) == ncol(X),
|
||||||
|
is.vector(h), is.numeric(h), length(h) == 1
|
||||||
|
)
|
||||||
|
|
||||||
|
.Call('R_gradient', PACKAGE = 'wip',
|
||||||
|
X, X_diff, as.double(Y), V, as.double(h));
|
||||||
|
}
|
||||||
|
|
||||||
|
elem.pairs <- function(elements) {
|
||||||
|
# Number of elements to match.
|
||||||
|
n <- length(elements)
|
||||||
|
# Create all combinations.
|
||||||
|
pairs <- rbind(rep(elements, each=n), rep(elements, n))
|
||||||
|
# Select unique combinations without self interaction.
|
||||||
|
return(pairs[, pairs[1, ] < pairs[2, ]])
|
||||||
|
}
|
||||||
|
grad <- function(X, Y, V, h, persistent = TRUE) {
|
||||||
|
n <- nrow(X)
|
||||||
|
p <- ncol(X)
|
||||||
|
|
||||||
|
if (!persistent) {
|
||||||
|
pair.index <- elem.pairs(seq(n))
|
||||||
|
i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
|
||||||
|
j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
|
||||||
|
lower <- ((i - 1) * n) + j
|
||||||
|
upper <- ((j - 1) * n) + i
|
||||||
|
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Projection matrix onto `span(V)`
|
||||||
|
Q <- diag(1, p) - tcrossprod(V, V)
|
||||||
|
# Vectorized distance matrix `D`.
|
||||||
|
vecD <- rowSums((X_diff %*% Q)^2)
|
||||||
|
|
||||||
|
|
||||||
|
# Weight matrix `W` (dnorm ... gaussean density function)
|
||||||
|
W <- matrix(1, n, n) # `exp(0) == 1`
|
||||||
|
W[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part
|
||||||
|
W[upper] <- t.default(W)[upper] # Mirror lower tri. to upper
|
||||||
|
W <- sweep(W, 2, colSums(W), FUN = `/`) # Col-Normalize
|
||||||
|
|
||||||
|
# Weighted `Y` momentums
|
||||||
|
y1 <- Y %*% W # Result is 1D -> transposition irrelevant
|
||||||
|
y2 <- Y^2 %*% W
|
||||||
|
|
||||||
|
# Per example loss `L(V, X_i)`
|
||||||
|
L <- y2 - y1^2
|
||||||
|
|
||||||
|
# Vectorized Weights with forced symmetry
|
||||||
|
vecS <- (L[i] - (Y[j] - y1[i])^2) * W[lower]
|
||||||
|
vecS <- vecS + ((L[j] - (Y[i] - y1[j])^2) * W[upper])
|
||||||
|
# Compute scaling of `X` row differences
|
||||||
|
vecS <- vecS * vecD
|
||||||
|
|
||||||
|
G <- crossprod(X_diff, X_diff * vecS) %*% V
|
||||||
|
G <- (-2 / (n * h^2)) * G
|
||||||
|
return(G)
|
||||||
|
}
|
||||||
|
rStiefl <- function(p, q) {
|
||||||
|
return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
|
||||||
|
}
|
||||||
|
|
||||||
|
n <- 200
|
||||||
|
p <- 12
|
||||||
|
q <- 10
|
||||||
|
|
||||||
|
X <- matrix(runif(n * p), n, p)
|
||||||
|
Y <- runif(n)
|
||||||
|
V <- rStiefl(p, q)
|
||||||
|
h <- 0.1
|
||||||
|
|
||||||
|
pair.index <- elem.pairs(seq(n))
|
||||||
|
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
|
||||||
|
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
|
||||||
|
lower <- ((i - 1) * n) + j
|
||||||
|
upper <- ((j - 1) * n) + i
|
||||||
|
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||||
|
|
||||||
|
stopifnot(all.equal(
|
||||||
|
grad(X, Y, V, h),
|
||||||
|
gradient.c(X, X_diff, Y, V, h)
|
||||||
|
))
|
||||||
|
microbenchmark(
|
||||||
|
grad = grad(X, Y, V, h),
|
||||||
|
gradient.c = gradient.c(X, X_diff, Y, V, h)
|
||||||
|
)
|
|
@ -0,0 +1,421 @@
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include <R_ext/BLAS.h>
|
||||||
|
#include <R_ext/Lapack.h>
|
||||||
|
#include <R_ext/Error.h>
|
||||||
|
// #include <Rmath.h>
|
||||||
|
|
||||||
|
#include "wip.h"
|
||||||
|
|
||||||
|
static inline void rowSums(const double *A,
|
||||||
|
const int nrow, const int ncol,
|
||||||
|
double *sum) {
|
||||||
|
int i, j, block_size, block_size_i;
|
||||||
|
const double *A_block = A;
|
||||||
|
const double *A_end = A + nrow * ncol;
|
||||||
|
|
||||||
|
if (nrow > CVE_MEM_CHUNK_SIZE) {
|
||||||
|
block_size = CVE_MEM_CHUNK_SIZE;
|
||||||
|
} else {
|
||||||
|
block_size = nrow;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Iterate `(block_size_i, ncol)` submatrix blocks.
|
||||||
|
for (i = 0; i < nrow; i += block_size_i) {
|
||||||
|
// Reset `A` to new block beginning.
|
||||||
|
A = A_block;
|
||||||
|
// Take block size of eveything left and reduce to max size.
|
||||||
|
block_size_i = nrow - i;
|
||||||
|
if (block_size_i > block_size) {
|
||||||
|
block_size_i = block_size;
|
||||||
|
}
|
||||||
|
// Compute first blocks column,
|
||||||
|
for (j = 0; j < block_size_i; ++j) {
|
||||||
|
sum[j] = A[j];
|
||||||
|
}
|
||||||
|
// and sum the following columns to the first one.
|
||||||
|
for (A += nrow; A < A_end; A += nrow) {
|
||||||
|
for (j = 0; j < block_size_i; ++j) {
|
||||||
|
sum[j] += A[j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Step one block forth.
|
||||||
|
A_block += block_size_i;
|
||||||
|
sum += block_size_i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void colSums(const double *A,
|
||||||
|
const int nrow, const int ncol,
|
||||||
|
double *sum) {
|
||||||
|
int j;
|
||||||
|
double *sum_end = sum + ncol;
|
||||||
|
|
||||||
|
memset(sum, 0, sizeof(double) * ncol);
|
||||||
|
for (; sum < sum_end; ++sum) {
|
||||||
|
for (j = 0; j < nrow; ++j) {
|
||||||
|
*sum += A[j];
|
||||||
|
}
|
||||||
|
A += nrow;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void rowSquareSums(const double *A,
|
||||||
|
const int nrow, const int ncol,
|
||||||
|
double *sum) {
|
||||||
|
int i, j, block_size, block_size_i;
|
||||||
|
const double *A_block = A;
|
||||||
|
const double *A_end = A + nrow * ncol;
|
||||||
|
|
||||||
|
if (nrow < CVE_MEM_CHUNK_SIZE) {
|
||||||
|
block_size = nrow;
|
||||||
|
} else {
|
||||||
|
block_size = CVE_MEM_CHUNK_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Iterate `(block_size_i, ncol)` submatrix blocks.
|
||||||
|
for (i = 0; i < nrow; i += block_size_i) {
|
||||||
|
// Reset `A` to new block beginning.
|
||||||
|
A = A_block;
|
||||||
|
// Take block size of eveything left and reduce to max size.
|
||||||
|
block_size_i = nrow - i;
|
||||||
|
if (block_size_i > block_size) {
|
||||||
|
block_size_i = block_size;
|
||||||
|
}
|
||||||
|
// Compute first blocks column,
|
||||||
|
for (j = 0; j < block_size_i; ++j) {
|
||||||
|
sum[j] = A[j] * A[j];
|
||||||
|
}
|
||||||
|
// and sum the following columns to the first one.
|
||||||
|
for (A += nrow; A < A_end; A += nrow) {
|
||||||
|
for (j = 0; j < block_size_i; ++j) {
|
||||||
|
sum[j] += A[j] * A[j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Step one block forth.
|
||||||
|
A_block += block_size_i;
|
||||||
|
sum += block_size_i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void rowSumsSymVec(const double *Avec, const int nrow,
|
||||||
|
const double *diag,
|
||||||
|
double *sum) {
|
||||||
|
int i, j;
|
||||||
|
|
||||||
|
if (*diag == 0.0) {
|
||||||
|
memset(sum, 0, nrow * sizeof(double));
|
||||||
|
} else {
|
||||||
|
for (i = 0; i < nrow; ++i) {
|
||||||
|
sum[i] = *diag;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j = 0; j < nrow; ++j) {
|
||||||
|
for (i = j + 1; i < nrow; ++i, ++Avec) {
|
||||||
|
sum[j] += *Avec;
|
||||||
|
sum[i] += *Avec;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* C[, j] = A[, j] * v for each j = 1 to ncol */
|
||||||
|
static void rowSweep(const double *A, const int nrow, const int ncol,
|
||||||
|
const char* op,
|
||||||
|
const double *v, // vector of length nrow
|
||||||
|
double *C) {
|
||||||
|
int i, j, block_size, block_size_i;
|
||||||
|
const double *A_block = A;
|
||||||
|
double *C_block = C;
|
||||||
|
const double *A_end = A + nrow * ncol;
|
||||||
|
|
||||||
|
if (nrow > CVE_MEM_CHUNK_SMALL) { // small because 3 vectors in cache
|
||||||
|
block_size = CVE_MEM_CHUNK_SMALL;
|
||||||
|
} else {
|
||||||
|
block_size = nrow;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*op == '+') {
|
||||||
|
// Iterate `(block_size_i, ncol)` submatrix blocks.
|
||||||
|
for (i = 0; i < nrow; i += block_size_i) {
|
||||||
|
// Set `A` and `C` to block beginning.
|
||||||
|
A = A_block;
|
||||||
|
C = C_block;
|
||||||
|
// Get current block's row size.
|
||||||
|
block_size_i = nrow - i;
|
||||||
|
if (block_size_i > block_size) {
|
||||||
|
block_size_i = block_size;
|
||||||
|
}
|
||||||
|
// Perform element wise operation for block.
|
||||||
|
for (; A < A_end; A += nrow, C += nrow) {
|
||||||
|
for (j = 0; j < block_size_i; ++j) {
|
||||||
|
C[j] = A[j] + v[j]; // FUN = '+'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Step one block forth.
|
||||||
|
A_block += block_size_i;
|
||||||
|
C_block += block_size_i;
|
||||||
|
v += block_size_i;
|
||||||
|
}
|
||||||
|
} else if (*op == '-') {
|
||||||
|
// Iterate `(block_size_i, ncol)` submatrix blocks.
|
||||||
|
for (i = 0; i < nrow; i += block_size_i) {
|
||||||
|
// Set `A` and `C` to block beginning.
|
||||||
|
A = A_block;
|
||||||
|
C = C_block;
|
||||||
|
// Get current block's row size.
|
||||||
|
block_size_i = nrow - i;
|
||||||
|
if (block_size_i > block_size) {
|
||||||
|
block_size_i = block_size;
|
||||||
|
}
|
||||||
|
// Perform element wise operation for block.
|
||||||
|
for (; A < A_end; A += nrow, C += nrow) {
|
||||||
|
for (j = 0; j < block_size_i; ++j) {
|
||||||
|
C[j] = A[j] - v[j]; // FUN = '-'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Step one block forth.
|
||||||
|
A_block += block_size_i;
|
||||||
|
C_block += block_size_i;
|
||||||
|
v += block_size_i;
|
||||||
|
}
|
||||||
|
} else if (*op == '*') {
|
||||||
|
// Iterate `(block_size_i, ncol)` submatrix blocks.
|
||||||
|
for (i = 0; i < nrow; i += block_size_i) {
|
||||||
|
// Set `A` and `C` to block beginning.
|
||||||
|
A = A_block;
|
||||||
|
C = C_block;
|
||||||
|
// Get current block's row size.
|
||||||
|
block_size_i = nrow - i;
|
||||||
|
if (block_size_i > block_size) {
|
||||||
|
block_size_i = block_size;
|
||||||
|
}
|
||||||
|
// Perform element wise operation for block.
|
||||||
|
for (; A < A_end; A += nrow, C += nrow) {
|
||||||
|
for (j = 0; j < block_size_i; ++j) {
|
||||||
|
C[j] = A[j] * v[j]; // FUN = '*'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Step one block forth.
|
||||||
|
A_block += block_size_i;
|
||||||
|
C_block += block_size_i;
|
||||||
|
v += block_size_i;
|
||||||
|
}
|
||||||
|
} else if (*op == '/') {
|
||||||
|
// Iterate `(block_size_i, ncol)` submatrix blocks.
|
||||||
|
for (i = 0; i < nrow; i += block_size_i) {
|
||||||
|
// Set `A` and `C` to block beginning.
|
||||||
|
A = A_block;
|
||||||
|
C = C_block;
|
||||||
|
// Get current block's row size.
|
||||||
|
block_size_i = nrow - i;
|
||||||
|
if (block_size_i > block_size) {
|
||||||
|
block_size_i = block_size;
|
||||||
|
}
|
||||||
|
// Perform element wise operation for block.
|
||||||
|
for (; A < A_end; A += nrow, C += nrow) {
|
||||||
|
for (j = 0; j < block_size_i; ++j) {
|
||||||
|
C[j] = A[j] / v[j]; // FUN = '/'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Step one block forth.
|
||||||
|
A_block += block_size_i;
|
||||||
|
C_block += block_size_i;
|
||||||
|
v += block_size_i;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
error("Got unknown 'op' (opperation) argument.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void matrixprod(const double *A, const int nrowA, const int ncolA,
|
||||||
|
const double *B, const int nrowB, const int ncolB,
|
||||||
|
double *C) {
|
||||||
|
const double one = 1.0;
|
||||||
|
const double zero = 0.0;
|
||||||
|
|
||||||
|
// DGEMM with parameterization:
|
||||||
|
// C <- A %*% B
|
||||||
|
F77_NAME(dgemm)("N", "N", &nrowA, &ncolB, &ncolA,
|
||||||
|
&one, A, &nrowA, B, &nrowB,
|
||||||
|
&zero, C, &nrowA);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void crossprod(const double *A, const int nrowA, const int ncolA,
|
||||||
|
const double *B, const int nrowB, const int ncolB,
|
||||||
|
double *C) {
|
||||||
|
const double one = 1.0;
|
||||||
|
const double zero = 0.0;
|
||||||
|
|
||||||
|
// DGEMM with parameterization:
|
||||||
|
// C <- t(A) %*% B
|
||||||
|
F77_NAME(dgemm)("T", "N", &ncolA, &ncolB, &nrowA,
|
||||||
|
&one, A, &nrowA, B, &nrowB,
|
||||||
|
&zero, C, &ncolA);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void nullProj(const double *B, const int nrowB, const int ncolB,
|
||||||
|
double *Q) {
|
||||||
|
const double minusOne = -1.0;
|
||||||
|
const double one = 1.0;
|
||||||
|
|
||||||
|
// Initialize as identity matrix.
|
||||||
|
memset(Q, 0, sizeof(double) * nrowB * nrowB);
|
||||||
|
double *Q_diag, *Q_end = Q + nrowB * nrowB;
|
||||||
|
for (Q_diag = Q; Q_diag < Q_end; Q_diag += nrowB + 1) {
|
||||||
|
*Q_diag = 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// DGEMM with parameterization:
|
||||||
|
// C <- (-1.0 * B %*% t(B)) + C
|
||||||
|
F77_NAME(dgemm)("N", "T", &nrowB, &nrowB, &ncolB,
|
||||||
|
&minusOne, B, &nrowB, B, &nrowB,
|
||||||
|
&one, Q, &nrowB);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void rangePairs(const int from, const int to, int *pairs) {
|
||||||
|
int i, j;
|
||||||
|
for (i = from; i < to; ++i) {
|
||||||
|
for (j = i + 1; j < to; ++j) {
|
||||||
|
pairs[0] = i;
|
||||||
|
pairs[1] = j;
|
||||||
|
pairs += 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// A dence skwe-symmetric rank 2 update.
|
||||||
|
// Perform the update
|
||||||
|
// C := alpha (A * B^T - B * A^T) + beta C
|
||||||
|
static void skewSymRank2k(const int nrow, const int ncol,
|
||||||
|
double alpha, const double *A, const double *B,
|
||||||
|
double beta,
|
||||||
|
double *C) {
|
||||||
|
F77_NAME(dgemm)("N", "T",
|
||||||
|
&nrow, &nrow, &ncol,
|
||||||
|
&alpha, A, &nrow, B, &nrow,
|
||||||
|
&beta, C, &nrow);
|
||||||
|
|
||||||
|
alpha *= -1.0;
|
||||||
|
beta = 1.0;
|
||||||
|
F77_NAME(dgemm)("N", "T",
|
||||||
|
&nrow, &nrow, &ncol,
|
||||||
|
&alpha, B, &nrow, A, &nrow,
|
||||||
|
&beta, C, &nrow);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: mutch potential for optimization!!!
|
||||||
|
static inline void weightedYandLoss(const int n,
|
||||||
|
const double *Y,
|
||||||
|
const double *vecD,
|
||||||
|
const double *vecW,
|
||||||
|
const double *colSums,
|
||||||
|
double *y1, double *L, double *vecS,
|
||||||
|
double *const loss) {
|
||||||
|
int i, j, k, N = n * (n - 1) / 2;
|
||||||
|
double l;
|
||||||
|
|
||||||
|
for (i = 0; i < n; ++i) {
|
||||||
|
y1[i] = Y[i] / colSums[i];
|
||||||
|
L[i] = Y[i] * Y[i] / colSums[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (k = j = 0; j < n; ++j) {
|
||||||
|
for (i = j + 1; i < n; ++i, ++k) {
|
||||||
|
y1[i] += Y[j] * vecW[k] / colSums[i];
|
||||||
|
y1[j] += Y[i] * vecW[k] / colSums[j];
|
||||||
|
L[i] += Y[j] * Y[j] * vecW[k] / colSums[i];
|
||||||
|
L[j] += Y[i] * Y[i] * vecW[k] / colSums[j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
l = 0.0;
|
||||||
|
for (i = 0; i < n; ++i) {
|
||||||
|
l += (L[i] -= y1[i] * y1[i]);
|
||||||
|
}
|
||||||
|
*loss = l / (double)n;
|
||||||
|
|
||||||
|
for (k = j = 0; j < n; ++j) {
|
||||||
|
for (i = j + 1; i < n; ++i, ++k) {
|
||||||
|
l = Y[j] - y1[i];
|
||||||
|
vecS[k] = (L[i] - (l * l)) / colSums[i];
|
||||||
|
l = Y[i] - y1[j];
|
||||||
|
vecS[k] += (L[j] - (l * l)) / colSums[j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (k = 0; k < N; ++k) {
|
||||||
|
vecS[k] *= vecW[k] * vecD[k];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline double gaussKernel(const double x, const double scale) {
|
||||||
|
return exp(scale * x * x);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void gradient(const int n, const int p, const int q,
|
||||||
|
const double *X,
|
||||||
|
const double *X_diff,
|
||||||
|
const double *Y,
|
||||||
|
const double *V,
|
||||||
|
const double h,
|
||||||
|
double *G, double *const loss) {
|
||||||
|
// Number of X_i to X_j not trivial pairs.
|
||||||
|
int i, N = (n * (n - 1)) / 2;
|
||||||
|
double scale = -0.5 / h;
|
||||||
|
const double one = 1.0;
|
||||||
|
|
||||||
|
if (X_diff == (void*)0) {
|
||||||
|
// TODO: ...
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate and compute projection matrix `Q = I_p - V * V^T`
|
||||||
|
double *Q = (double*)malloc(p * p * sizeof(double));
|
||||||
|
nullProj(V, p, q, Q);
|
||||||
|
|
||||||
|
// allocate and compute vectorized distance matrix with a temporary
|
||||||
|
// projection of `X_diff`.
|
||||||
|
double *vecD = (double*)malloc(N * sizeof(double));
|
||||||
|
double *X_proj;
|
||||||
|
if (p < 5) { // TODO: refine that!
|
||||||
|
X_proj = (double*)malloc(N * 5 * sizeof(double));
|
||||||
|
} else {
|
||||||
|
X_proj = (double*)malloc(N * p * sizeof(double));
|
||||||
|
}
|
||||||
|
matrixprod(X_diff, N, p, Q, p, p, X_proj);
|
||||||
|
rowSquareSums(X_proj, N, p, vecD);
|
||||||
|
|
||||||
|
// Apply kernel to distence vector for weights computation.
|
||||||
|
double *vecW = X_proj; // reuse memory area, no longer needed.
|
||||||
|
for (i = 0; i < N; ++i) {
|
||||||
|
vecW[i] = gaussKernel(vecD[i], scale);
|
||||||
|
}
|
||||||
|
double *colSums = X_proj + N; // still allocated!
|
||||||
|
rowSumsSymVec(vecW, n, &one, colSums); // rowSums = colSums cause Sym
|
||||||
|
|
||||||
|
// compute weighted responces of first end second momontum, aka y1, y2.
|
||||||
|
double *y1 = X_proj + N + n;
|
||||||
|
double *L = X_proj + N + (2 * n);
|
||||||
|
// Allocate X_diff scaling vector `vecS`, not in `X_proj` mem area because
|
||||||
|
// used symultanious to X_proj in final gradient computation.
|
||||||
|
double *vecS = (double*)malloc(N * sizeof(double));
|
||||||
|
weightedYandLoss(n, Y, vecD, vecW, colSums, y1, L, vecS, loss);
|
||||||
|
|
||||||
|
// compute the gradient using X_proj for intermidiate scaled X_diff.
|
||||||
|
rowSweep(X_diff, N, p, "*", vecS, X_proj);
|
||||||
|
// reuse Q which has the required dim (p, p).
|
||||||
|
crossprod(X_diff, N, p, X_proj, N, p, Q);
|
||||||
|
// Product with V
|
||||||
|
matrixprod(Q, p, p, V, p, q, G);
|
||||||
|
// And final scaling (TODO: move into matrixprod!)
|
||||||
|
scale = -2.0 / (((double)n) * h * h);
|
||||||
|
N = p * q;
|
||||||
|
for (i = 0; i < N; ++i) {
|
||||||
|
G[i] *= scale;
|
||||||
|
}
|
||||||
|
|
||||||
|
free(vecS);
|
||||||
|
free(X_proj);
|
||||||
|
free(vecD);
|
||||||
|
free(Q);
|
||||||
|
}
|
|
@ -0,0 +1,159 @@
|
||||||
|
#ifndef _CVE_INCLUDE_GUARD_
|
||||||
|
#define _CVE_INCLUDE_GUARD_
|
||||||
|
|
||||||
|
#include <Rinternals.h>
|
||||||
|
|
||||||
|
#define CVE_MEM_CHUNK_SMALL 1016
|
||||||
|
#define CVE_MEM_CHUNK_SIZE 2032
|
||||||
|
|
||||||
|
static inline void rowSums(const double *A,
|
||||||
|
const int nrow, const int ncol,
|
||||||
|
double *sum);
|
||||||
|
SEXP R_rowSums(SEXP A) {
|
||||||
|
SEXP sums = PROTECT(allocVector(REALSXP, nrows(A)));
|
||||||
|
|
||||||
|
rowSums(REAL(A), nrows(A), ncols(A), REAL(sums));
|
||||||
|
|
||||||
|
UNPROTECT(1);
|
||||||
|
return sums;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void colSums(const double *A,
|
||||||
|
const int nrow, const int ncol,
|
||||||
|
double *sum);
|
||||||
|
SEXP R_colSums(SEXP A) {
|
||||||
|
SEXP sums = PROTECT(allocVector(REALSXP, ncols(A)));
|
||||||
|
|
||||||
|
colSums(REAL(A), nrows(A), ncols(A), REAL(sums));
|
||||||
|
|
||||||
|
UNPROTECT(1);
|
||||||
|
return sums;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void rowSquareSums(const double*, const int, const int, double*);
|
||||||
|
SEXP R_rowSquareSums(SEXP A) {
|
||||||
|
SEXP result = PROTECT(allocVector(REALSXP, nrows(A)));
|
||||||
|
|
||||||
|
rowSquareSums(REAL(A), nrows(A), ncols(A), REAL(result));
|
||||||
|
|
||||||
|
UNPROTECT(1);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void rowSumsSymVec(const double *Avec, const int nrow,
|
||||||
|
const double *diag,
|
||||||
|
double *sum);
|
||||||
|
SEXP R_rowSumsSymVec(SEXP Avec, SEXP nrow, SEXP diag) {
|
||||||
|
SEXP sum = PROTECT(allocVector(REALSXP, *INTEGER(nrow)));
|
||||||
|
|
||||||
|
rowSumsSymVec(REAL(Avec), *INTEGER(nrow), REAL(diag), REAL(sum));
|
||||||
|
|
||||||
|
UNPROTECT(1);
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void rowSweep(const double *A, const int nrow, const int ncol,
|
||||||
|
const char* op,
|
||||||
|
const double *v, // vector of length nrow
|
||||||
|
double *C);
|
||||||
|
SEXP R_rowSweep(SEXP A, SEXP v, SEXP op) {
|
||||||
|
SEXP C = PROTECT(allocMatrix(REALSXP, nrows(A), ncols(A)));
|
||||||
|
|
||||||
|
rowSweep(REAL(A), nrows(A), ncols(A),
|
||||||
|
CHAR(STRING_ELT(op, 0)),
|
||||||
|
REAL(v), REAL(C));
|
||||||
|
|
||||||
|
UNPROTECT(1);
|
||||||
|
return C;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void matrixprod(const double *A, const int nrowA, const int ncolA,
|
||||||
|
const double *B, const int nrowB, const int ncolB,
|
||||||
|
double *C);
|
||||||
|
SEXP R_matrixprod(SEXP A, SEXP B) {
|
||||||
|
SEXP C = PROTECT(allocMatrix(REALSXP, nrows(A), ncols(B)));
|
||||||
|
|
||||||
|
matrixprod(REAL(A), nrows(A), ncols(A),
|
||||||
|
REAL(B), nrows(B), ncols(B),
|
||||||
|
REAL(C));
|
||||||
|
|
||||||
|
UNPROTECT(1);
|
||||||
|
return C;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void crossprod(const double* A, const int nrowA, const int ncolA,
|
||||||
|
const double* B, const int ncolB, const int nrowB,
|
||||||
|
double* C);
|
||||||
|
SEXP R_crossprod(SEXP A, SEXP B) {
|
||||||
|
SEXP C = PROTECT(allocMatrix(REALSXP, ncols(A), ncols(B)));
|
||||||
|
|
||||||
|
crossprod(REAL(A), nrows(A), ncols(A),
|
||||||
|
REAL(B), nrows(B), ncols(B),
|
||||||
|
REAL(C));
|
||||||
|
|
||||||
|
UNPROTECT(1);
|
||||||
|
return C;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void skewSymRank2k(const int n, const int k,
|
||||||
|
double alpha, const double *A, const double *B,
|
||||||
|
double beta,
|
||||||
|
double *C);
|
||||||
|
SEXP R_skewSymRank2k(SEXP A, SEXP B, SEXP alpha, SEXP beta) {
|
||||||
|
SEXP C = PROTECT(allocMatrix(REALSXP, nrows(A), nrows(A)));
|
||||||
|
memset(REAL(C), 0, nrows(A) * nrows(A) * sizeof(double));
|
||||||
|
|
||||||
|
skewSymRank2k(nrows(A), ncols(A),
|
||||||
|
*REAL(alpha), REAL(A), REAL(B),
|
||||||
|
*REAL(beta), REAL(C));
|
||||||
|
|
||||||
|
UNPROTECT(1);
|
||||||
|
return C;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void nullProj(const double* B, const int nrowB, const int ncolB,
|
||||||
|
double* Q);
|
||||||
|
SEXP R_nullProj(SEXP B) {
|
||||||
|
SEXP Q = PROTECT(allocMatrix(REALSXP, nrows(B), nrows(B)));
|
||||||
|
|
||||||
|
nullProj(REAL(B), nrows(B), ncols(B), REAL(Q));
|
||||||
|
|
||||||
|
UNPROTECT(1);
|
||||||
|
return Q;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void rangePairs(const int from, const int to, int *pairs);
|
||||||
|
SEXP R_rangePairs(SEXP from, SEXP to) {
|
||||||
|
int start = asInteger(from);
|
||||||
|
int end = asInteger(to) + 1;
|
||||||
|
int n = end - start;
|
||||||
|
|
||||||
|
SEXP out = PROTECT(allocMatrix(INTSXP, 2, n * (n - 1) / 2));
|
||||||
|
rangePairs(start, end, INTEGER(out));
|
||||||
|
|
||||||
|
UNPROTECT(1);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void gradient(const int n, const int p, const int q,
|
||||||
|
const double *X,
|
||||||
|
const double *X_diff,
|
||||||
|
const double *Y,
|
||||||
|
const double *V,
|
||||||
|
const double h,
|
||||||
|
double *G, double *const loss);
|
||||||
|
SEXP R_gradient(SEXP X, SEXP X_diff, SEXP Y, SEXP V, SEXP h) {
|
||||||
|
int N = (nrows(X) * (nrows(X) - 1)) / 2;
|
||||||
|
|
||||||
|
SEXP G = PROTECT(allocMatrix(REALSXP, nrows(V), ncols(V)));
|
||||||
|
SEXP loss = PROTECT(allocVector(REALSXP, 1));
|
||||||
|
|
||||||
|
gradient(nrows(X), ncols(X), ncols(V),
|
||||||
|
REAL(X), REAL(X_diff), REAL(Y), REAL(V), *REAL(h),
|
||||||
|
REAL(G), REAL(loss));
|
||||||
|
|
||||||
|
UNPROTECT(2);
|
||||||
|
return G;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* _CVE_INCLUDE_GUARD_ */
|
Loading…
Reference in New Issue