2
0
Fork 0
CVE/validate.R

105 lines
3.2 KiB
R

#
# Usage:
# ~$ Rscript validate.R
# load MAVE package for comparison
library(MAVE)
# load (and compile) cve and dataset source
library(Rcpp)
cat("Compiling source 'cve_V1.cpp'\n")
Rcpp::sourceCpp('cve_V1.cpp', embeddedR = FALSE)
# load dataset sampler
source('CVE/R/datasets.R')
# set default nr of simulations
nr.sim <- 25
#' Orthogonal projection to sub-space spanned by `B`
#'
#' @param B Matrix
#' @return Orthogonal Projection Matrix
proj <- function(B) {
B %*% solve(t(B) %*% B) %*% t(B)
}
#' Compute nObs given dataset dimension \code{n}.
#'
#' @param n Number of samples
#' @return Numeric estimate of \code{nObs}
nObs <- function (n) { n^0.5 }
# dataset names
dataset.names <- c("M1", "M2", "M3", "M4", "M5") # M4 not implemented jet
## prepare "logging"
# result error, time, ... data.frame's
error <- matrix(nrow = nr.sim, ncol = 2 * length(dataset.names))
time <- matrix(nrow = nr.sim, ncol = 2 * length(dataset.names))
# convert to data.frames
error <- as.data.frame(error)
time <- as.data.frame(time)
# set names
names(error) <- kronecker(c("CVE.", "MAVE."), dataset.names, paste0)
names(time) <- kronecker(c("CVE.", "MAVE."), dataset.names, paste0)
# get current time
start.time <- Sys.time()
## main comparison loop (iterate `nr.sim` times for each dataset)
for (i in seq_along(dataset.names)) {
for (j in 1:nr.sim) {
name <- dataset.names[i]
# reporting progress
cat("\rRunning Test (", name, j , "):",
(i - 1) * nr.sim + j, "/", length(dataset.names) * nr.sim,
" - Time since start:", format(Sys.time() - start.time), "\033[K")
# create new dataset
ds <- dataset(name)
k <- ncol(ds$B) # real dim
# call CVE
cve.time <- system.time(
cve.res <- cve_cpp(ds$X, ds$Y,
k = k,
nObs = nObs(nrow(ds$X)),
verbose = FALSE)
)
# call MAVE
mave.time <- system.time(
mave.res <- mave(Y ~ .,
data = data.frame(X = ds$X, Y = ds$Y),
method = "meanMAVE")
)
# compute real and approximated sub-space projections
P <- proj(ds$B) # real
P.cve <- proj(cve.res$B)
P.mave <- proj(mave.res$dir[[k]])
# compute (and store) errors
error[j, paste0("CVE.", name)] <- norm(P - P.cve, 'F') / sqrt(2 * k)
error[j, paste0("MAVE.", name)] <- norm(P - P.mave, 'F') / sqrt(2 * k)
# store run-times
time[j, paste0("CVE.", name)] <- cve.time["elapsed"]
time[j, paste0("MAVE.", name)] <- mave.time["elapsed"]
}
}
cat("\n\n## Time [sec] Means:\n")
print(colMeans(time))
cat("\n## Error Means:\n")
print(colMeans(error))
len <- length(dataset.names)
pdf("plots/Rplots_validate.pdf")
boxplot(as.matrix(error),
main = paste0("Error (nr.sim = ", nr.sim, ")"),
ylab = expression(error == group("||", P[B] - P[hat(B)], "||")[F] / sqrt(2*k)),
las = 2,
at = c(1:len, 1:len + len + 1)
)
boxplot(as.matrix(time),
main = paste0("Time (nr.sim = ", nr.sim, ")"),
ylab = "time [sec]",
las = 2,
at = c(1:len, 1:len + len + 1)
)
cat("Plot saved to 'plots/Rplots_validate.pdf'\n")
suppressMessages(dev.off())