CVE/simulations/predict_dim_compair.R

# Comparison of dimension prediction methods including maves cross-validation
# with meanMAVE as basis for reference.
#
# Results are written to a CSV file with a dataset column and its simulation nr.
#
# Note:
#   All methods are called via interface functions defined in
#   'simulation_interfaces.R'. In addition this file provides a few convenience
#   functions.
source('simulation_interfaces.R')

# Number of simulations
NR.SIM <- 100L
NR.DS <- 7L
max.dim <- 5L

# List of dimension prediction methods.
methods <- c("cv", "elbow", "wilcoxon")

# Build result data matrix (repeat each dataset simulation times).
result <- matrix(NA, nrow = NR.DS * NR.SIM,
                     ncol = length(methods) + 2) # +2 for dataset and meanMAVE.
result[, 1L] <- rep(paste0("M", seq(NR.DS)), NR.SIM)
colnames(result) <- c("dataset", "true.dim", "mave.cv", methods)

# Create a progress logger for length many reports.
logger <- progress_logger(NR.DS * NR.SIM)
for (sim in seq_len(nrow(result))) {
    # Create new dataset.
    with(dataset(result[sim, 1L]), { X <<- X; Y <<- Y; B <<- B })
    # Write true dr dimension.
    result[sim, "true.dim"] <- ncol(B)

    # Report simulation progress to user.
    logger('')

    # Call CVE and let each dimension prediction method estimate.
    dr <- cve.call(X = X, Y = Y, max.dim = max.dim)
    for (method in methods) {
        result[sim, method] <- predict_dim(dr, method = method)$k
    }

    # The same using meanMAVE and MAVE's 'mave.dim'.
    dr <- meanMAVE(X = X, Y = Y, max.dim = max.dim)
    result[sim, "mave.cv"] <- which.min(mave.dim(dr)$cv)

    # explicit call to the garbage collector.
    gc()
}

path <- paste0(getwd(),
    format(Sys.time(), '/results/predict_dim_compair_%Y-%m-%dT%H%M%S'))
# Write entire simulation results into a single file.
write.csv(result, file = paste0(path, ".csv"), row.names = FALSE)