2
0
Fork 0
CVE/simulations/predict_dim_compair.R

55 lines
1.8 KiB
R
Raw Normal View History

2019-12-17 11:07:33 +00:00
# Comparison of dimension prediction methods including maves cross-validation
# with meanMAVE as basis for reference.
#
# Results are written to a CSV file with a dataset column and its simulation nr.
#
# Note:
# All methods are called via interface functions defined in
# 'simulation_interfaces.R'. In addition this file provides a few convenience
# functions.
source('simulation_interfaces.R')
# Number of simulations
NR.SIM <- 100L
NR.DS <- 7L
max.dim <- 5L
# List of dimension prediction methods.
methods <- c("cv", "elbow", "wilcoxon")
# Build result data matrix (repeat each dataset simulation times).
result <- matrix(NA, nrow = NR.DS * NR.SIM,
ncol = length(methods) + 2) # +2 for dataset and meanMAVE.
result[, 1L] <- rep(paste0("M", seq(NR.DS)), NR.SIM)
colnames(result) <- c("dataset", "true.dim", "mave.cv", methods)
# Create a progress logger for length many reports.
logger <- progress_logger(NR.DS * NR.SIM)
for (sim in seq_len(nrow(result))) {
# Create new dataset.
with(dataset(result[sim, 1L]), { X <<- X; Y <<- Y; B <<- B })
# Write true dr dimension.
result[sim, "true.dim"] <- ncol(B)
# Report simulation progress to user.
logger('')
# Call CVE and let each dimension prediction method estimate.
dr <- cve.call(X = X, Y = Y, max.dim = max.dim)
for (method in methods) {
result[sim, method] <- predict_dim(dr, method = method)$k
}
# The same using meanMAVE and MAVE's 'mave.dim'.
dr <- meanMAVE(X = X, Y = Y, max.dim = max.dim)
result[sim, "mave.cv"] <- which.min(mave.dim(dr)$cv)
# explicit call to the garbage collector.
gc()
}
path <- paste0(getwd(),
format(Sys.time(), '/results/predict_dim_compair_%Y-%m-%dT%H%M%S'))
# Write entire simulation results into a single file.
write.csv(result, file = paste0(path, ".csv"), row.names = FALSE)