tensor_predictors/simulations/eeg_sim.R

203 lines
7.6 KiB
R

library(tensorPredictors)
suppressPackageStartupMessages({
library(ggplot2)
})
################################################################################
### Loading EEG Data ###
################################################################################
# Load as 3D predictors `X` and flat response `y`
c(X, y) %<-% local({
# Load from file
ds <- readRDS("eeg_data.rds")
# Dimension values
n <- nrow(ds) # sample size (nr. of people)
p <- 64L # nr. of predictors (count of sensorce)
t <- 256L # nr. of time points (measurements)
# Extract dimension names
nNames <- ds$PersonID
tNames <- as.character(seq(t))
pNames <- unlist(strsplit(colnames(ds)[2 + t * seq(p)], "_"))[c(TRUE, FALSE)]
# Split into predictors (with proper dims and names) and response
X <- array(as.matrix(ds[, -(1:2)]),
dim = c(person = n, time = t, sensor = p),
dimnames = list(person = nNames, time = tNames, sensor = pNames)
)
y <- ds$Case_Control
list(X, y)
})
################################################################################
### LOO-CV for Multiple Methods ###
################################################################################
# compatibility wrapper for function implemented with the "old" API
toNewAPI <- function(func) {
function(...) {
res <- func(...)
list(alphas = list(res$beta, res$alpha))
}
}
# Number of (2D)^2 PCA components per axis
npcs <- list(c(3, 4), c(15, 15), c(20, 30), dim(X)[-1])
# setup methods for simulation (with unified API)
methods <- list(
kpir.base = list(
fun = toNewAPI(kpir.base),
is.applicable = function(npc) prod(npc) < 100
),
kpir.new.vlp = list(
fun = toNewAPI(function(X, Fy) kpir.new(X, Fy, init.method = "vlp")),
is.applicable = function(npc) prod(npc) < 100
),
kpir.new.ls = list(
fun = toNewAPI(function(X, Fy) kpir.new(X, Fy, init.method = "ls")),
is.applicable = function(npc) prod(npc) < 100
),
kpir.ls = list(
fun = kpir.ls,
is.applicable = function(npc) TRUE
),
kpir.momentum.vlp = list(
fun = toNewAPI(function(X, Fy) kpir.momentum(X, Fy, init.method = "vlp")),
is.applicable = function(npc) prod(npc) < 100
),
kpir.momentum.ls = list(
fun = toNewAPI(function(X, Fy) kpir.momentum(X, Fy, init.method = "ls")),
is.applicable = function(npc) prod(npc) < 100
),
kpir.approx.vlp = list(
fun = toNewAPI(function(X, Fy) kpir.approx(X, Fy, init.method = "vlp")),
is.applicable = function(npc) prod(npc) < 100
),
kpir.approx.ls = list(
fun = toNewAPI(function(X, Fy) kpir.approx(X, Fy, init.method = "ls")),
is.applicable = function(npc) TRUE
)
)
# define AUC for reporting while simulation is running
auc <- function(y_true, y_pred) pROC::roc(y_true, y_pred, quiet = TRUE)$auc[1]
# init complete simulation as empty
sim <- NULL
for (npc in npcs) {
# check if any PC count is smaller than the axis
if (any(npc < dim(X)[-1])) {
# Reduce dimensions using (2D)^2 PCA, which is a special case of the Higher
# Order Principal Component Analysis
pcs <- hopca(X, npc = npc, sample.axis = 1)
# Reduce dimensions
X.pc <- mlm(X, Map(t, pcs), modes = 2:3)
} else {
# No reduction
X.pc <- X
}
for (name in names(methods)) {
# check if method can be applied to current reduction dimensions
if (!methods[[name]]$is.applicable(npc)) {
next
}
# extract method to be applied
method <- methods[[name]]$fun
# report name of current simulation method
cat(sprintf("npc: (t = %d, p = %d), method: %s\n", npc[1], npc[2], name))
# Leave-One-Out Cross-Validation
loo.cv <- data.frame(
y_true = y, y_pred = NA, # CV responses
elapsed = NA, sys.self = NA, user.self = NA # execution time
)
for (i in seq_len(nrow(X.pc))) {
# report progress
cat(sprintf("\r%3d/%d", i, nrow(X.pc)))
# Split into training/test data
X.train <- X.pc[-i, , ]
y.train <- scale(y[-i], scale = FALSE)
X.test <- X.pc[i, , , drop = FALSE]
y.test <- scale(y[i], center = attr(y.train, "scaled:center"), scale = FALSE)
# fit reduction (with method one of the methods to be "validated")
time <- system.time(sdr <- method(X.train, c(y.train)))
# reduce training data and fit a GLM
x.train <- mlm(X.train, Map(t, sdr$alphas), modes = 2:3)
fit <- glm(y ~ x, family = binomial(link = "logit"),
data = data.frame(y = y[-i], x = matrix(x.train, nrow(x.train))))
# predict from reduced test data
x.test <- mlm(X.test, Map(t, sdr$alphas), modes = 2:3)
y.pred <- predict(fit, data.frame(x = matrix(x.test, 1)), type = "response")
loo.cv[i, "y_pred"] <- y.pred
loo.cv[i, "elapsed"] <- time["elapsed"]
loo.cv[i, "sys.self"] <- time["sys.self"]
loo.cv[i, "user.self"] <- time["user.self"]
}
# accumulate LOO-CV results to previous results
loo.cv$method <- factor(name)
loo.cv$npc <- factor(sprintf("(%d, %d)", npc[1], npc[2]))
sim <- rbind(sim, loo.cv)
# Report partial sim done and one of the interesting measures
cat(sprintf(" (Done) AUC: %f\n", with(loo.cv, auc(y_true, y_pred))))
# dump simulation (after each fold) to file
saveRDS(sim, "eeg_sim.rds")
}
}
################################################################################
### Simulation Stats ###
################################################################################
# sim <- readRDS("eeg_sim.rds")
metrics <- list(
# acc: Accuracy. P(Yhat = Y). Estimated as: (TP+TN)/(P+N).
"Acc" = function(y_true, y_pred) mean(round(y_pred) == y_true),
# err: Error rate. P(Yhat != Y). Estimated as: (FP+FN)/(P+N).
"Err" = function(y_true, y_pred) mean(round(y_pred) != y_true),
# fpr: False positive rate. P(Yhat = + | Y = -). aliases: Fallout.
"FPR" = function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 0]),
# tpr: True positive rate. P(Yhat = + | Y = +). aliases: Sensitivity, Recall.
"TPR" = function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 1]),
# fnr: False negative rate. P(Yhat = - | Y = +). aliases: Miss.
"FNR" = function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 1]),
# tnr: True negative rate. P(Yhat = - | Y = -).
"TNR" = function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 0]),
# auc: Area Under the Curve
"AUC" = function(y_true, y_pred) pROC::roc(y_true, y_pred, quiet = TRUE)$auc[1],
# auc.sd: Estimated standard error of the AUC estimate
"sd(AUC)" = function(y_true, y_pred)
sqrt(pROC::var(pROC::roc(y_true, y_pred, quiet = TRUE)))
)
# Applies metrics on a group
do.stats <- function(group) {
stat <- Map(do.call, metrics, list(as.list(group[c("y_true", "y_pred")])))
data.frame(method = group$method[1], npc = group$npc[1], stat, check.names = FALSE)
}
# Call stats for each grouping
stats <- do.call(rbind, Map(do.stats, split(sim, ~ method + npc, sep = " ")))
rownames(stats) <- NULL
print(stats, digits = 2)
# and execution time stats
times <- aggregate(cbind(elapsed, sys.self, user.self) ~ method + npc, sim, median)
print(times, digits = 2)