library(tensorPredictors) suppressPackageStartupMessages({ library(ggplot2) }) ################################################################################ ### Loading EEG Data ### ################################################################################ # Load as 3D predictors `X` and flat response `y` c(X, y) %<-% local({ # Load from file ds <- readRDS("eeg_data.rds") # Dimension values n <- nrow(ds) # sample size (nr. of people) p <- 64L # nr. of predictors (count of sensorce) t <- 256L # nr. of time points (measurements) # Extract dimension names nNames <- ds$PersonID tNames <- as.character(seq(t)) pNames <- unlist(strsplit(colnames(ds)[2 + t * seq(p)], "_"))[c(TRUE, FALSE)] # Split into predictors (with proper dims and names) and response X <- array(as.matrix(ds[, -(1:2)]), dim = c(person = n, time = t, sensor = p), dimnames = list(person = nNames, time = tNames, sensor = pNames) ) y <- ds$Case_Control list(X, y) }) ################################################################################ ### LOO-CV for Multiple Methods ### ################################################################################ # compatibility wrapper for function implemented with the "old" API toNewAPI <- function(func) { function(...) { res <- func(...) list(alphas = list(res$beta, res$alpha)) } } # Number of (2D)^2 PCA components per axis npcs <- list(c(3, 4), c(15, 15), c(20, 30), dim(X)[-1]) # setup methods for simulation (with unified API) methods <- list( hopca = list( fun = function(X, Fy) list(alphas = hopca(X, npc = c(1L, 1L), 1L)), is.applicable = function(npc) all(npc == c(256L, 64L)) # NOT reduced ), kpir.base = list( fun = toNewAPI(kpir.base), is.applicable = function(npc) prod(npc) < 100 ), kpir.new.vlp = list( fun = toNewAPI(function(X, Fy) kpir.new(X, Fy, init.method = "vlp")), is.applicable = function(npc) prod(npc) < 100 ), kpir.new.ls = list( fun = toNewAPI(function(X, Fy) kpir.new(X, Fy, init.method = "ls")), is.applicable = function(npc) prod(npc) < 100 ), kpir.ls = list( fun = kpir.ls, is.applicable = function(npc) TRUE ), LSIR = list( fun = function(X, Fy) { res <- LSIR(matrix(X, nrow(X)), Fy, dim(X)[2], dim(X)[3]) list(alphas = list(res$beta, res$alpha)) }, is.applicable = function(npc) prod(npc) < 1000 ), kpir.momentum.vlp = list( fun = toNewAPI(function(X, Fy) kpir.momentum(X, Fy, init.method = "vlp")), is.applicable = function(npc) prod(npc) < 100 ), kpir.momentum.ls = list( fun = toNewAPI(function(X, Fy) kpir.momentum(X, Fy, init.method = "ls")), is.applicable = function(npc) prod(npc) < 100 ), kpir.approx.vlp = list( fun = toNewAPI(function(X, Fy) kpir.approx(X, Fy, init.method = "vlp")), is.applicable = function(npc) prod(npc) < 100 ), kpir.approx.ls = list( fun = toNewAPI(function(X, Fy) kpir.approx(X, Fy, init.method = "ls")), is.applicable = function(npc) TRUE ) ) # define AUC for reporting while simulation is running auc <- function(y_true, y_pred) pROC::roc(y_true, y_pred, quiet = TRUE)$auc[1] # file to dump simulation results log.file <- format(Sys.time(), "eeg_sim_%Y%m%dT%H%M.rds") # init complete simulation as empty sim <- NULL for (npc in npcs) { # check if any PC count is smaller than the axis if (any(npc < dim(X)[-1])) { # Reduce dimensions using (2D)^2 PCA, which is a special case of the Higher # Order Principal Component Analysis pcs <- hopca(X, npc = npc, sample.axis = 1) # Reduce dimensions X.pc <- mlm(X, Map(t, pcs), modes = 2:3) } else { # No reduction X.pc <- X } for (name in names(methods)) { # check if method can be applied to current reduction dimensions if (!methods[[name]]$is.applicable(npc)) { next } # extract method to be applied method <- methods[[name]]$fun # report name of current simulation method cat(sprintf("npc: (t = %d, p = %d), method: %s\n", npc[1], npc[2], name)) # Leave-One-Out Cross-Validation loo.cv <- data.frame( y_true = y, y_pred = NA, # CV responses elapsed = NA, sys.self = NA, user.self = NA # execution time ) for (i in seq_len(nrow(X.pc))) { # report progress cat(sprintf("\r%3d/%d", i, nrow(X.pc))) # Split into training/test data X.train <- X.pc[-i, , ] y.train <- scale(y[-i], scale = FALSE) X.test <- X.pc[i, , , drop = FALSE] y.test <- scale(y[i], center = attr(y.train, "scaled:center"), scale = FALSE) # fit reduction (with method one of the methods to be "validated") time <- system.time(sdr <- method(X.train, c(y.train))) # reduce training data and fit a GLM if ("Deltas" %in% names(sdr)) { # the small deltas are `delta_j = Delta_j^-1 alpha_j` deltas <- Map(solve, sdr$Deltas, sdr$alphas) } else { deltas <- sdr$alphas } x.train <- mlm(X.train, Map(t, deltas), modes = 2:3) fit <- glm(y ~ x, family = binomial(link = "logit"), data = data.frame(y = y[-i], x = matrix(x.train, nrow(x.train)))) # predict from reduced test data x.test <- mlm(X.test, Map(t, deltas), modes = 2:3) y.pred <- predict(fit, data.frame(x = matrix(x.test, 1)), type = "response") loo.cv[i, "y_pred"] <- y.pred loo.cv[i, "elapsed"] <- time["elapsed"] loo.cv[i, "sys.self"] <- time["sys.self"] loo.cv[i, "user.self"] <- time["user.self"] } # accumulate LOO-CV results to previous results loo.cv$method <- factor(name) loo.cv$npc <- factor(sprintf("(%d, %d)", npc[1], npc[2])) sim <- rbind(sim, loo.cv) # Report partial sim done and one of the interesting measures cat(sprintf(" (Done) AUC: %f\n", with(loo.cv, auc(y_true, y_pred)))) # dump simulation (after each fold) to file saveRDS(sim, log.file) } } ################################################################################ ### Simulation Stats ### ################################################################################ # sim <- readRDS("eeg_sim_.rds") # sim <- readRDS("eeg_sim_20220524T2100.rds") # sim <- readRDS("eeg_sim_20220525T1700.rds") metrics <- list( # acc: Accuracy. P(Yhat = Y). Estimated as: (TP+TN)/(P+N). "Acc" = function(y_true, y_pred) mean(round(y_pred) == y_true), # err: Error rate. P(Yhat != Y). Estimated as: (FP+FN)/(P+N). "Err" = function(y_true, y_pred) mean(round(y_pred) != y_true), # fpr: False positive rate. P(Yhat = + | Y = -). aliases: Fallout. "FPR" = function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 0]), # tpr: True positive rate. P(Yhat = + | Y = +). aliases: Sensitivity, Recall. "TPR" = function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 1]), # fnr: False negative rate. P(Yhat = - | Y = +). aliases: Miss. "FNR" = function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 1]), # tnr: True negative rate. P(Yhat = - | Y = -). "TNR" = function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 0]), # auc: Area Under the Curve "AUC" = function(y_true, y_pred) pROC::roc(y_true, y_pred, quiet = TRUE)$auc[1], # auc.sd: Estimated standard error of the AUC estimate "sd(AUC)" = function(y_true, y_pred) sqrt(pROC::var(pROC::roc(y_true, y_pred, quiet = TRUE))) ) # Applies metrics on a group do.stats <- function(group) { stat <- Map(do.call, metrics, list(as.list(group[c("y_true", "y_pred")]))) data.frame(method = group$method[1], npc = group$npc[1], stat, check.names = FALSE) } # Call stats for each grouping stats <- do.call(rbind, Map(do.stats, split(sim, ~ method + npc, sep = " ", drop = TRUE))) rownames(stats) <- NULL print(stats, digits = 2) # and execution time stats times <- aggregate(cbind(elapsed, sys.self, user.self) ~ method + npc, sim, median) print(times, digits = 2) ## stats: 2022.05.24 # method npc Acc Err FPR TPR FNR TNR AUC sd(AUC) # 1 kpir.base (3, 4) 0.70 0.30 0.60 0.87 0.13 0.40 0.75 0.047 # 2 kpir.new.vlp (3, 4) 0.70 0.30 0.60 0.87 0.13 0.40 0.75 0.047 # 3 kpir.new.ls (3, 4) 0.74 0.26 0.51 0.88 0.12 0.49 0.77 0.045 # 4 kpir.ls (3, 4) 0.75 0.25 0.49 0.88 0.12 0.51 0.78 0.044 # (*) kpir.ls (3, 4) 0.78 0.22 0.38 0.87 0.13 0.62 0.86 0.034 # 5 kpir.momentum.vlp (3, 4) 0.70 0.30 0.60 0.87 0.13 0.40 0.75 0.047 # 6 kpir.momentum.ls (3, 4) 0.70 0.30 0.58 0.87 0.13 0.42 0.76 0.046 # 7 kpir.approx.vlp (3, 4) 0.68 0.32 0.62 0.86 0.14 0.38 0.74 0.048 # 8 kpir.approx.ls (3, 4) 0.73 0.27 0.53 0.88 0.12 0.47 0.78 0.044 # 9 kpir.ls (15, 15) 0.75 0.25 0.47 0.87 0.13 0.53 0.78 0.044 # (*) kpir.ls (15, 15) 0.76 0.24 0.44 0.88 0.12 0.56 0.83 0.039 # 10 kpir.approx.ls (15, 15) 0.73 0.27 0.51 0.87 0.13 0.49 0.78 0.044 # 11 kpir.ls (20, 30) 0.75 0.25 0.47 0.87 0.13 0.53 0.78 0.044 # (*) kpir.ls (20, 30) 0.77 0.23 0.36 0.84 0.16 0.64 0.79 0.045 # 12 kpir.approx.ls (20, 30) 0.63 0.37 1.00 1.00 0.00 0.00 0.51 0.053 # 13 kpir.ls (256, 64) 0.75 0.25 0.44 0.87 0.13 0.56 0.78 0.044 # (*) kpir.ls (256, 64) 0.68 0.32 0.51 0.79 0.21 0.49 0.66 0.054 # 14 kpir.approx.ls (256, 64) 0.75 0.25 0.44 0.87 0.13 0.56 0.78 0.044 # # (*) Using reduction matrices `Map(solve, sdr$Deltas, sdr$alphas)` instead # of only `sdr$alpha`. ## times: 2022.05.24 # method npc elapsed sys.self user.self # 1 kpir.base (3, 4) 0.079 0.402 0.220 # 2 kpir.new.vlp (3, 4) 0.075 0.393 0.217 # 3 kpir.new.ls (3, 4) 0.218 0.243 0.305 # 4 kpir.ls (3, 4) 0.003 0.006 0.006 # 5 kpir.momentum.vlp (3, 4) 0.143 0.595 0.359 # 6 kpir.momentum.ls (3, 4) 0.297 0.252 0.385 # 7 kpir.approx.vlp (3, 4) 0.044 0.240 0.152 # 8 kpir.approx.ls (3, 4) 0.066 0.144 0.121 # 9 kpir.ls (15, 15) 0.012 0.059 0.034 # 10 kpir.approx.ls (15, 15) 0.813 3.911 2.325 # 11 kpir.ls (20, 30) 0.028 0.129 0.080 # 12 kpir.approx.ls (20, 30) 2.110 10.111 6.290 # 13 kpir.ls (256, 64) 1.252 6.215 3.681 # 14 kpir.approx.ls (256, 64) 36.754 141.028 147.490