2019-08-30 19:16:52 +00:00
|
|
|
# Usage:
|
|
|
|
# ~$ Rscript runtime_test.R
|
2019-09-16 09:23:09 +00:00
|
|
|
# library(CVEpureR) # load CVE's pure R implementation
|
|
|
|
library(CVE) # load CVE
|
2019-08-30 19:16:52 +00:00
|
|
|
|
|
|
|
#' Writes log information to console. (to not get bored^^)
|
|
|
|
tell.user <- function(name, start.time, i, length) {
|
|
|
|
cat("\rRunning Test (", name, "):",
|
|
|
|
i, "/", length,
|
|
|
|
" - elapsed:", format(Sys.time() - start.time), "\033[K")
|
|
|
|
}
|
2019-11-20 18:03:21 +00:00
|
|
|
#' Computes "distance" of spanned subspaces.
|
|
|
|
#' @param B1 Semi-orthonormal basis matrix
|
|
|
|
#' @param B2 Semi-orthonormal basis matrix
|
|
|
|
#' @return Frobenius norm of subspace projection matrix diff.
|
2019-09-02 19:07:56 +00:00
|
|
|
subspace.dist <- function(B1, B2){
|
2019-11-20 18:03:21 +00:00
|
|
|
P1 <- tcrossprod(B1, B1)
|
|
|
|
P2 <- tcrossprod(B2, B2)
|
2019-09-02 19:07:56 +00:00
|
|
|
return(norm(P1 - P2, type = 'F'))
|
|
|
|
}
|
2019-08-30 19:16:52 +00:00
|
|
|
|
2019-11-20 18:03:21 +00:00
|
|
|
# Set random seed
|
|
|
|
set.seed(437)
|
|
|
|
|
2019-08-30 19:16:52 +00:00
|
|
|
# Number of simulations
|
2019-10-18 07:06:36 +00:00
|
|
|
SIM.NR <- 50
|
2019-08-30 19:16:52 +00:00
|
|
|
# maximal number of iterations in curvilinear search algorithm
|
|
|
|
MAXIT <- 50
|
|
|
|
# number of arbitrary starting values for curvilinear optimization
|
|
|
|
ATTEMPTS <- 10
|
|
|
|
# set names of datasets
|
|
|
|
dataset.names <- c("M1", "M2", "M3", "M4", "M5")
|
|
|
|
# Set used CVE method
|
2019-10-18 07:06:36 +00:00
|
|
|
methods <- c("simple") # c("legacy", "simple", "linesearch", "sgd")
|
2019-09-02 19:07:56 +00:00
|
|
|
|
|
|
|
if ("legacy" %in% methods) {
|
|
|
|
# Source legacy code (but only if needed)
|
|
|
|
source("CVE_legacy/function_script.R")
|
|
|
|
}
|
2019-08-30 19:16:52 +00:00
|
|
|
|
|
|
|
# Setup error and time tracking variables
|
|
|
|
error <- matrix(NA, SIM.NR, length(methods) * length(dataset.names))
|
|
|
|
time <- matrix(NA, SIM.NR, ncol(error))
|
|
|
|
colnames(error) <- kronecker(paste0(dataset.names, '-'), methods, paste0)
|
|
|
|
colnames(time) <- colnames(error)
|
|
|
|
|
|
|
|
# Create new log file and write CSV (actualy TSV) header.
|
|
|
|
# (do not overwrite existing logs)
|
|
|
|
log.nr <- length(list.files('tmp/', pattern = '.*\\.log'))
|
|
|
|
file <- file.path('tmp', paste0('test', log.nr, '.log'))
|
|
|
|
cat('dataset\tmethod\terror\ttime\n', sep = '', file = file)
|
|
|
|
# Open a new pdf device for plotting into (do not overwrite existing ones)
|
2019-10-18 07:06:36 +00:00
|
|
|
path <- paste0('test', log.nr, '.pdf')
|
|
|
|
pdf(file.path('tmp', path))
|
|
|
|
cat('Plotting to file:', path, '\n')
|
2019-08-30 19:16:52 +00:00
|
|
|
|
|
|
|
# only for telling user (to stdout)
|
|
|
|
count <- 0
|
|
|
|
start.time <- Sys.time()
|
|
|
|
# Start simulation loop.
|
|
|
|
for (sim in 1:SIM.NR) {
|
|
|
|
# Repeat for each dataset.
|
|
|
|
for (name in dataset.names) {
|
|
|
|
count <- count + 1
|
|
|
|
tell.user(name, start.time, count, SIM.NR * length(dataset.names))
|
|
|
|
|
|
|
|
# Create a new dataset
|
|
|
|
ds <- dataset(name)
|
|
|
|
# Prepare X, Y and combine to data matrix
|
|
|
|
Y <- ds$Y
|
|
|
|
X <- ds$X
|
|
|
|
data <- cbind(Y, X)
|
|
|
|
# get dimensions
|
|
|
|
dim <- ncol(X)
|
|
|
|
truedim <- ncol(ds$B)
|
|
|
|
|
|
|
|
for (method in methods) {
|
|
|
|
if (tolower(method) == "legacy") {
|
|
|
|
dr.time <- system.time(
|
2019-11-20 18:03:21 +00:00
|
|
|
dr <- stiefel_opt(data,
|
2019-08-30 19:16:52 +00:00
|
|
|
k = dim - truedim,
|
|
|
|
k0 = ATTEMPTS,
|
2019-11-20 18:03:21 +00:00
|
|
|
h = estimate.bandwidth(X,
|
|
|
|
k = truedim,
|
|
|
|
nObs = sqrt(nrow(X))),
|
2019-08-30 19:16:52 +00:00
|
|
|
maxit = MAXIT
|
|
|
|
)
|
|
|
|
)
|
|
|
|
dr$B <- fill_base(dr$est_base)[, 1:truedim]
|
|
|
|
} else {
|
|
|
|
dr.time <- system.time(
|
|
|
|
dr <- cve.call(X, Y,
|
|
|
|
method = method,
|
|
|
|
k = truedim,
|
|
|
|
attempts = ATTEMPTS
|
|
|
|
)
|
|
|
|
)
|
2019-11-20 18:03:21 +00:00
|
|
|
dr$B <- basis(dr, truedim)
|
2019-08-30 19:16:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
key <- paste0(name, '-', method)
|
2019-09-02 19:07:56 +00:00
|
|
|
error[sim, key] <- subspace.dist(dr$B, ds$B) / sqrt(2 * truedim)
|
2019-08-30 19:16:52 +00:00
|
|
|
time[sim, key] <- dr.time["elapsed"]
|
|
|
|
|
|
|
|
# Log results to file (mostly for long running simulations)
|
|
|
|
cat(paste0(
|
|
|
|
c(name, method, error[sim, key], time[sim, key]),
|
|
|
|
collapse = '\t'
|
|
|
|
), '\n',
|
|
|
|
sep = '', file = file, append = TRUE
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-11-20 18:03:21 +00:00
|
|
|
cat("\n\n## Time [sec] Summary:\n")
|
|
|
|
print(summary(time))
|
|
|
|
cat("\n## Error Summary:\n")
|
|
|
|
print(summary(error))
|
2019-08-30 19:16:52 +00:00
|
|
|
|
|
|
|
boxplot(error,
|
|
|
|
main = paste0("Error (Nr of simulations ", SIM.NR, ")"),
|
|
|
|
ylab = "Error",
|
2019-11-20 18:03:21 +00:00
|
|
|
las = 2
|
2019-08-30 19:16:52 +00:00
|
|
|
)
|
|
|
|
boxplot(time,
|
|
|
|
main = paste0("Time (Nr of simulations ", SIM.NR, ")"),
|
|
|
|
ylab = "Time [sec]",
|
2019-11-20 18:03:21 +00:00
|
|
|
las = 2
|
2019-08-30 19:16:52 +00:00
|
|
|
)
|
|
|
|
|