# # Usage: # ~$ Rscript validate.R # load MAVE package for comparison library(MAVE) # load (and compile) cve and dataset source library(Rcpp) cat("Compiling source 'cve_V1.cpp'\n") Rcpp::sourceCpp('cve_V1.cpp', embeddedR = FALSE) # load dataset sampler source('CVE/R/datasets.R') # set default nr of simulations nr.sim <- 25 #' Orthogonal projection to sub-space spanned by `B` #' #' @param B Matrix #' @return Orthogonal Projection Matrix proj <- function(B) { B %*% solve(t(B) %*% B) %*% t(B) } #' Compute nObs given dataset dimension \code{n}. #' #' @param n Number of samples #' @return Numeric estimate of \code{nObs} nObs <- function (n) { n^0.5 } # dataset names dataset.names <- c("M1", "M2", "M3", "M4", "M5") # M4 not implemented jet ## prepare "logging" # result error, time, ... data.frame's error <- matrix(nrow = nr.sim, ncol = 2 * length(dataset.names)) time <- matrix(nrow = nr.sim, ncol = 2 * length(dataset.names)) # convert to data.frames error <- as.data.frame(error) time <- as.data.frame(time) # set names names(error) <- kronecker(c("CVE.", "MAVE."), dataset.names, paste0) names(time) <- kronecker(c("CVE.", "MAVE."), dataset.names, paste0) # get current time start.time <- Sys.time() ## main comparison loop (iterate `nr.sim` times for each dataset) for (i in seq_along(dataset.names)) { for (j in 1:nr.sim) { name <- dataset.names[i] # reporting progress cat("\rRunning Test (", name, j , "):", (i - 1) * nr.sim + j, "/", length(dataset.names) * nr.sim, " - Time since start:", format(Sys.time() - start.time), "\033[K") # create new dataset ds <- dataset(name) k <- ncol(ds$B) # real dim # call CVE cve.time <- system.time( cve.res <- cve_cpp(ds$X, ds$Y, k = k, nObs = nObs(nrow(ds$X)), verbose = FALSE) ) # call MAVE mave.time <- system.time( mave.res <- mave(Y ~ ., data = data.frame(X = ds$X, Y = ds$Y), method = "meanMAVE") ) # compute real and approximated sub-space projections P <- proj(ds$B) # real P.cve <- proj(cve.res$B) P.mave <- proj(mave.res$dir[[k]]) # compute (and store) errors error[j, paste0("CVE.", name)] <- norm(P - P.cve, 'F') / sqrt(2 * k) error[j, paste0("MAVE.", name)] <- norm(P - P.mave, 'F') / sqrt(2 * k) # store run-times time[j, paste0("CVE.", name)] <- cve.time["elapsed"] time[j, paste0("MAVE.", name)] <- mave.time["elapsed"] } } cat("\n\n## Time [sec] Means:\n") print(colMeans(time)) cat("\n## Error Means:\n") print(colMeans(error)) len <- length(dataset.names) pdf("plots/Rplots_validate.pdf") boxplot(as.matrix(error), main = paste0("Error (nr.sim = ", nr.sim, ")"), ylab = expression(error == group("||", P[B] - P[hat(B)], "||")[F] / sqrt(2*k)), las = 2, at = c(1:len, 1:len + len + 1) ) boxplot(as.matrix(time), main = paste0("Time (nr.sim = ", nr.sim, ")"), ylab = "time [sec]", las = 2, at = c(1:len, 1:len + len + 1) ) cat("Plot saved to 'plots/Rplots_validate.pdf'\n") suppressMessages(dev.off())