CVE/runtime_test.R

# Usage:
# ~$ Rscript runtime_test.R

#' Writes log information to console. (to not get bored^^)
tell.user <- function(name, start.time, i, length) {
    cat("\rRunning Test (", name, "):",
        i, "/", length,
        " - elapsed:", format(Sys.time() - start.time), "\033[K")
}
subspace.dist <- function(B1, B2){
    P1 <- B1 %*% solve(t(B1) %*% B1) %*% t(B1)
    P2 <- B2 %*% solve(t(B2) %*% B2) %*% t(B2)
    return(norm(P1 - P2, type = 'F'))
}

# Number of simulations
SIM.NR <- 20
# maximal number of iterations in curvilinear search algorithm
MAXIT <- 50
# number of arbitrary starting values for curvilinear optimization
ATTEMPTS <- 10
# set names of datasets
dataset.names <- c("M1", "M2", "M3", "M4", "M5")
# Set used CVE method
methods <- c("simple") # c("legacy", "simple", "sgd", "linesearch")

library(CVE) # load CVE
if ("legacy" %in% methods) {
    # Source legacy code (but only if needed)
    source("CVE_legacy/function_script.R")
}

# Setup error and time tracking variables
error <- matrix(NA, SIM.NR, length(methods) * length(dataset.names))
time <- matrix(NA, SIM.NR, ncol(error))
colnames(error) <- kronecker(paste0(dataset.names, '-'), methods, paste0)
colnames(time) <- colnames(error)

# Create new log file and write CSV (actualy TSV) header.
# (do not overwrite existing logs)
log.nr <- length(list.files('tmp/', pattern = '.*\\.log'))
file <- file.path('tmp', paste0('test', log.nr, '.log'))
cat('dataset\tmethod\terror\ttime\n', sep = '', file = file)
# Open a new pdf device for plotting into (do not overwrite existing ones)
pdf(file.path('tmp', paste0('test', log.nr, '.pdf')))

# only for telling user (to stdout)
count <- 0
start.time <- Sys.time()
# Start simulation loop.
for (sim in 1:SIM.NR) {
    # Repeat for each dataset.
    for (name in dataset.names) {
        count <- count + 1
        tell.user(name, start.time, count, SIM.NR * length(dataset.names))

        # Create a new dataset
        ds <- dataset(name)
        # Prepare X, Y and combine to data matrix
        Y <- ds$Y
        X <- ds$X
        data <- cbind(Y, X)
        # get dimensions
        dim <- ncol(X)
        truedim <- ncol(ds$B)

        for (method in methods) {
            if (tolower(method) == "legacy") {
                dr.time <- system.time(
                    dr <- stiefl_opt(data,
                        k = dim - truedim,
                        k0 = ATTEMPTS,
                        h = estimate.bandwidth(X, k = truedim, nObs = sqrt(nrow(X))),
                        maxit = MAXIT
                    )
                )
                dr$B <- fill_base(dr$est_base)[, 1:truedim]
            } else {
                dr.time <- system.time(
                    dr <- cve.call(X, Y,
                        method = method,
                        k = truedim,
                        attempts = ATTEMPTS
                    )
                )
                dr <- dr[[truedim]]
            }

            key <- paste0(name, '-', method)
            error[sim, key] <- subspace.dist(dr$B, ds$B) / sqrt(2 * truedim)
            time[sim, key] <- dr.time["elapsed"]

            # Log results to file (mostly for long running simulations)
            cat(paste0(
                    c(name, method, error[sim, key], time[sim, key]),
                    collapse = '\t'
                ), '\n',
                sep = '', file = file, append = TRUE
            )
        }
    }
}

cat("\n\n## Time [sec] Means:\n")
print(colMeans(time))
cat("\n## Error Means:\n")
print(colMeans(error))

at <- seq(ncol(error)) + rep(seq(ncol(error) / length(methods)) - 1, each = length(methods))
boxplot(error,
    main = paste0("Error (Nr of simulations ", SIM.NR, ")"),
    ylab = "Error",
    las = 2,
    at = at
)
boxplot(time,
    main = paste0("Time (Nr of simulations ", SIM.NR, ")"),
    ylab = "Time [sec]",
    las = 2,
    at = at
)