2
0
Fork 0
CVE/simulations/runtime_test.R

129 lines
3.7 KiB
R
Raw Normal View History

2019-12-17 11:07:33 +00:00
# Usage:
# ~$ Rscript runtime_test.R
textplot <- function(...) {
text <- unlist(list(...))
if (length(text) > 20) {
text <- c(text[1:17],
' ...... (skipped, text too long) ......',
text[c(-1, 0) + length(text)])
}
plot(NA, xlim = c(0, 1), ylim = c(0, 1),
bty = 'n', xaxt = 'n', yaxt = 'n', xlab = '', ylab = '')
for (i in seq_along(text)) {
text(0, 1 - (i / 20),
text[[i]], pos = 4)
}
}
# library(CVEpureR) # load CVE's pure R implementation
library(CVE) # load CVE
#' Writes log information to console. (to not get bored^^)
tell.user <- function(name, start, i, length) {
cat("\rRunning Test (", name, "):",
i, "/", length,
" - elapsed:", format(Sys.time() - start), "\033[K")
}
#' Computes "distance" of spanned subspaces.
#' @param B1 Semi-orthonormal basis matrix
#' @param B2 Semi-orthonormal basis matrix
#' @return Frobenius norm of subspace projection matrix diff.
subspace.dist <- function(B1, B2){
P1 <- tcrossprod(B1, B1)
P2 <- tcrossprod(B2, B2)
return(norm(P1 - P2, type = 'F'))
}
# Set random seed
set.seed(437)
# Number of simulations
SIM.NR <- 50L
# maximal number of iterations in curvilinear search algorithm
MAXIT <- 50L
# number of arbitrary starting values for curvilinear optimization
ATTEMPTS <- 10L
# set names of datasets
ds.names <- paste0("M", seq(7))
# Set used CVE method
methods <- c("simple", "weighted") # c("legacy", "simple", "linesearch", "sgd")
# Setup error and time tracking variables
error <- matrix(NA, SIM.NR, length(methods) * length(ds.names))
time <- matrix(NA, SIM.NR, ncol(error))
colnames(error) <- kronecker(paste0(ds.names, '-'), methods, paste0)
colnames(time) <- colnames(error)
# Create new log file and write CSV (actualy TSV) header.
# (do not overwrite existing logs)
log.nr <- length(list.files('tmp/', pattern = '.*\\.log'))
file <- file.path('tmp', paste0('test', log.nr, '.log'))
cat('dataset\tmethod\terror\ttime\n', sep = '', file = file)
# Open a new pdf device for plotting into (do not overwrite existing ones)
path <- paste0('test', log.nr, '.pdf')
pdf(file.path('tmp', path))
cat('Plotting to file:', path, '\n')
# only for telling user (to stdout)
count <- 0
start <- Sys.time()
# Start simulation loop.
for (sim in 1:SIM.NR) {
# Repeat for each dataset.
for (name in ds.names) {
tell.user(name, start, (count <- count + 1), SIM.NR * length(ds.names))
# Create a new dataset
ds <- dataset(name)
# Prepare X, Y and combine to data matrix
Y <- ds$Y
X <- ds$X
data <- cbind(Y, X)
# get dimensions
k <- ncol(ds$B)
for (method in methods) {
dr.time <- system.time(
dr <- cve.call(X, Y,
method = method,
k = k,
attempts = ATTEMPTS
)
)
dr$B <- coef(dr, k)
key <- paste0(name, '-', method)
error[sim, key] <- subspace.dist(dr$B, ds$B) / sqrt(2 * k)
time[sim, key] <- dr.time["elapsed"]
# Log results to file (mostly for long running simulations)
cat(paste0(
c(name, method, error[sim, key], time[sim, key]),
collapse = '\t'
), '\n',
sep = '', file = file, append = TRUE
)
}
}
}
cat("\n\n## Time [sec] Summary:\n")
print(summary(time))
cat("\n## Error Summary:\n")
print(summary(error))
boxplot(error,
main = paste0("Error (Nr of simulations ", SIM.NR, ")"),
ylab = "Error",
las = 2
)
boxplot(time,
main = paste0("Time (Nr of simulations ", SIM.NR, ")"),
ylab = "Time [sec]",
las = 2
)