library(tensorPredictors) suppressPackageStartupMessages({ library(ggplot2) }) ################################################################################ ### Loading EEG Data ### ################################################################################ # Load as 3D predictors `X` and flat response `y` c(X, y) %<-% local({ # Load from file ds <- readRDS("eeg_data.rds") # Dimension values n <- nrow(ds) # sample size (nr. of people) p <- 64L # nr. of predictors (count of sensorce) t <- 256L # nr. of time points (measurements) # Extract dimension names nNames <- ds$PersonID tNames <- as.character(seq(t)) pNames <- unlist(strsplit(colnames(ds)[2 + t * seq(p)], "_"))[c(TRUE, FALSE)] # Split into predictors (with proper dims and names) and response X <- array(as.matrix(ds[, -(1:2)]), dim = c(person = n, time = t, sensor = p), dimnames = list(person = nNames, time = tNames, sensor = pNames) ) y <- ds$Case_Control list(X, y) }) ################################################################################ ### LOO-CV for Multiple Methods ### ################################################################################ # compatibility wrapper for function implemented with the "old" API toNewAPI <- function(func) { function(...) { res <- func(...) list(alphas = list(res$beta, res$alpha)) } } # Number of (2D)^2 PCA components per axis npcs <- list(c(3, 4), c(15, 15), c(20, 30), dim(X)[-1]) # setup methods for simulation (with unified API) methods <- list( kpir.base = list( fun = toNewAPI(kpir.base), is.applicable = function(npc) prod(npc) < 100 ), kpir.new.vlp = list( fun = toNewAPI(function(X, Fy) kpir.new(X, Fy, init.method = "vlp")), is.applicable = function(npc) prod(npc) < 100 ), kpir.new.ls = list( fun = toNewAPI(function(X, Fy) kpir.new(X, Fy, init.method = "ls")), is.applicable = function(npc) prod(npc) < 100 ), kpir.ls = list( fun = kpir.ls, is.applicable = function(npc) TRUE ), kpir.momentum.vlp = list( fun = toNewAPI(function(X, Fy) kpir.momentum(X, Fy, init.method = "vlp")), is.applicable = function(npc) prod(npc) < 100 ), kpir.momentum.ls = list( fun = toNewAPI(function(X, Fy) kpir.momentum(X, Fy, init.method = "ls")), is.applicable = function(npc) prod(npc) < 100 ), kpir.approx.vlp = list( fun = toNewAPI(function(X, Fy) kpir.approx(X, Fy, init.method = "vlp")), is.applicable = function(npc) prod(npc) < 100 ), kpir.approx.ls = list( fun = toNewAPI(function(X, Fy) kpir.approx(X, Fy, init.method = "ls")), is.applicable = function(npc) TRUE ) ) # define AUC for reporting while simulation is running auc <- function(y_true, y_pred) pROC::roc(y_true, y_pred, quiet = TRUE)$auc[1] # init complete simulation as empty sim <- NULL for (npc in npcs) { # check if any PC count is smaller than the axis if (any(npc < dim(X)[-1])) { # Reduce dimensions using (2D)^2 PCA, which is a special case of the Higher # Order Principal Component Analysis pcs <- hopca(X, npc = npc, sample.axis = 1) # Reduce dimensions X.pc <- mlm(X, Map(t, pcs), modes = 2:3) } else { # No reduction X.pc <- X } for (name in names(methods)) { # check if method can be applied to current reduction dimensions if (!methods[[name]]$is.applicable(npc)) { next } # extract method to be applied method <- methods[[name]]$fun # report name of current simulation method cat(sprintf("npc: (t = %d, p = %d), method: %s\n", npc[1], npc[2], name)) # Leave-One-Out Cross-Validation loo.cv <- data.frame( y_true = y, y_pred = NA, # CV responses elapsed = NA, sys.self = NA, user.self = NA # execution time ) for (i in seq_len(nrow(X.pc))) { # report progress cat(sprintf("\r%3d/%d", i, nrow(X.pc))) # Split into training/test data X.train <- X.pc[-i, , ] y.train <- scale(y[-i], scale = FALSE) X.test <- X.pc[i, , , drop = FALSE] y.test <- scale(y[i], center = attr(y.train, "scaled:center"), scale = FALSE) # fit reduction (with method one of the methods to be "validated") time <- system.time(sdr <- method(X.train, c(y.train))) # reduce training data and fit a GLM x.train <- mlm(X.train, Map(t, sdr$alphas), modes = 2:3) fit <- glm(y ~ x, family = binomial(link = "logit"), data = data.frame(y = y[-i], x = matrix(x.train, nrow(x.train)))) # predict from reduced test data x.test <- mlm(X.test, Map(t, sdr$alphas), modes = 2:3) y.pred <- predict(fit, data.frame(x = matrix(x.test, 1)), type = "response") loo.cv[i, "y_pred"] <- y.pred loo.cv[i, "elapsed"] <- time["elapsed"] loo.cv[i, "sys.self"] <- time["sys.self"] loo.cv[i, "user.self"] <- time["user.self"] } # accumulate LOO-CV results to previous results loo.cv$method <- factor(name) loo.cv$npc <- factor(sprintf("(%d, %d)", npc[1], npc[2])) sim <- rbind(sim, loo.cv) # Report partial sim done and one of the interesting measures cat(sprintf(" (Done) AUC: %f\n", with(loo.cv, auc(y_true, y_pred)))) # dump simulation (after each fold) to file saveRDS(sim, "eeg_sim.rds") } } ################################################################################ ### Simulation Stats ### ################################################################################ # sim <- readRDS("eeg_sim.rds") metrics <- list( # acc: Accuracy. P(Yhat = Y). Estimated as: (TP+TN)/(P+N). "Acc" = function(y_true, y_pred) mean(round(y_pred) == y_true), # err: Error rate. P(Yhat != Y). Estimated as: (FP+FN)/(P+N). "Err" = function(y_true, y_pred) mean(round(y_pred) != y_true), # fpr: False positive rate. P(Yhat = + | Y = -). aliases: Fallout. "FPR" = function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 0]), # tpr: True positive rate. P(Yhat = + | Y = +). aliases: Sensitivity, Recall. "TPR" = function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 1]), # fnr: False negative rate. P(Yhat = - | Y = +). aliases: Miss. "FNR" = function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 1]), # tnr: True negative rate. P(Yhat = - | Y = -). "TNR" = function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 0]), # auc: Area Under the Curve "AUC" = function(y_true, y_pred) pROC::roc(y_true, y_pred, quiet = TRUE)$auc[1], # auc.sd: Estimated standard error of the AUC estimate "sd(AUC)" = function(y_true, y_pred) sqrt(pROC::var(pROC::roc(y_true, y_pred, quiet = TRUE))) ) # Applies metrics on a group do.stats <- function(group) { stat <- Map(do.call, metrics, list(as.list(group[c("y_true", "y_pred")]))) data.frame(method = group$method[1], npc = group$npc[1], stat, check.names = FALSE) } # Call stats for each grouping stats <- do.call(rbind, Map(do.stats, split(sim, ~ method + npc, sep = " "))) rownames(stats) <- NULL print(stats, digits = 2) # and execution time stats times <- aggregate(cbind(elapsed, sys.self, user.self) ~ method + npc, sim, median) print(times, digits = 2)