# Comparison of dimension prediction methods including maves cross-validation # with meanMAVE as basis for reference. # # Results are written to a CSV file with a dataset column and its simulation nr. # # Note: # All methods are called via interface functions defined in # 'simulation_interfaces.R'. In addition this file provides a few convenience # functions. source('simulation_interfaces.R') # Number of simulations NR.SIM <- 100L NR.DS <- 7L max.dim <- 5L # List of dimension prediction methods. methods <- c("cv", "elbow", "wilcoxon") # Build result data matrix (repeat each dataset simulation times). result <- matrix(NA, nrow = NR.DS * NR.SIM, ncol = length(methods) + 2) # +2 for dataset and meanMAVE. result[, 1L] <- rep(paste0("M", seq(NR.DS)), NR.SIM) colnames(result) <- c("dataset", "true.dim", "mave.cv", methods) # Create a progress logger for length many reports. logger <- progress_logger(NR.DS * NR.SIM) for (sim in seq_len(nrow(result))) { # Create new dataset. with(dataset(result[sim, 1L]), { X <<- X; Y <<- Y; B <<- B }) # Write true dr dimension. result[sim, "true.dim"] <- ncol(B) # Report simulation progress to user. logger('') # Call CVE and let each dimension prediction method estimate. dr <- cve.call(X = X, Y = Y, max.dim = max.dim) for (method in methods) { result[sim, method] <- predict_dim(dr, method = method)$k } # The same using meanMAVE and MAVE's 'mave.dim'. dr <- meanMAVE(X = X, Y = Y, max.dim = max.dim) result[sim, "mave.cv"] <- which.min(mave.dim(dr)$cv) # explicit call to the garbage collector. gc() } path <- paste0(getwd(), format(Sys.time(), '/results/predict_dim_compair_%Y-%m-%dT%H%M%S')) # Write entire simulation results into a single file. write.csv(result, file = paste0(path, ".csv"), row.names = FALSE)