# Usage: # ~$ Rscript runtime_test.R # library(CVEpureR) # load CVE's pure R implementation library(CVE) # load CVE #' Writes log information to console. (to not get bored^^) tell.user <- function(name, start.time, i, length) { cat("\rRunning Test (", name, "):", i, "/", length, " - elapsed:", format(Sys.time() - start.time), "\033[K") } subspace.dist <- function(B1, B2){ P1 <- B1 %*% solve(t(B1) %*% B1) %*% t(B1) P2 <- B2 %*% solve(t(B2) %*% B2) %*% t(B2) return(norm(P1 - P2, type = 'F')) } # Number of simulations SIM.NR <- 20 # maximal number of iterations in curvilinear search algorithm MAXIT <- 50 # number of arbitrary starting values for curvilinear optimization ATTEMPTS <- 10 # set names of datasets dataset.names <- c("M1", "M2", "M3", "M4", "M5") # Set used CVE method methods <- c("simple") # c("legacy", "simple", "sgd", "linesearch") if ("legacy" %in% methods) { # Source legacy code (but only if needed) source("CVE_legacy/function_script.R") } # Setup error and time tracking variables error <- matrix(NA, SIM.NR, length(methods) * length(dataset.names)) time <- matrix(NA, SIM.NR, ncol(error)) colnames(error) <- kronecker(paste0(dataset.names, '-'), methods, paste0) colnames(time) <- colnames(error) # Create new log file and write CSV (actualy TSV) header. # (do not overwrite existing logs) log.nr <- length(list.files('tmp/', pattern = '.*\\.log')) file <- file.path('tmp', paste0('test', log.nr, '.log')) cat('dataset\tmethod\terror\ttime\n', sep = '', file = file) # Open a new pdf device for plotting into (do not overwrite existing ones) pdf(file.path('tmp', paste0('test', log.nr, '.pdf'))) # only for telling user (to stdout) count <- 0 start.time <- Sys.time() # Start simulation loop. for (sim in 1:SIM.NR) { # Repeat for each dataset. for (name in dataset.names) { count <- count + 1 tell.user(name, start.time, count, SIM.NR * length(dataset.names)) # Create a new dataset ds <- dataset(name) # Prepare X, Y and combine to data matrix Y <- ds$Y X <- ds$X data <- cbind(Y, X) # get dimensions dim <- ncol(X) truedim <- ncol(ds$B) for (method in methods) { if (tolower(method) == "legacy") { dr.time <- system.time( dr <- stiefl_opt(data, k = dim - truedim, k0 = ATTEMPTS, h = estimate.bandwidth(X, k = truedim, nObs = sqrt(nrow(X))), maxit = MAXIT ) ) dr$B <- fill_base(dr$est_base)[, 1:truedim] } else { dr.time <- system.time( dr <- cve.call(X, Y, method = method, k = truedim, attempts = ATTEMPTS ) ) dr <- dr[[truedim]] } key <- paste0(name, '-', method) error[sim, key] <- subspace.dist(dr$B, ds$B) / sqrt(2 * truedim) time[sim, key] <- dr.time["elapsed"] # Log results to file (mostly for long running simulations) cat(paste0( c(name, method, error[sim, key], time[sim, key]), collapse = '\t' ), '\n', sep = '', file = file, append = TRUE ) } } } cat("\n\n## Time [sec] Means:\n") print(colMeans(time)) cat("\n## Error Means:\n") print(colMeans(error)) at <- seq(ncol(error)) + rep(seq(ncol(error) / length(methods)) - 1, each = length(methods)) boxplot(error, main = paste0("Error (Nr of simulations ", SIM.NR, ")"), ylab = "Error", las = 2, at = at ) boxplot(time, main = paste0("Time (Nr of simulations ", SIM.NR, ")"), ylab = "Time [sec]", las = 2, at = at )