2022-05-24 19:07:40 +00:00
|
|
|
library(tensorPredictors)
|
|
|
|
suppressPackageStartupMessages({
|
|
|
|
library(ggplot2)
|
|
|
|
})
|
|
|
|
|
|
|
|
################################################################################
|
|
|
|
### Loading EEG Data ###
|
|
|
|
################################################################################
|
|
|
|
|
|
|
|
# Load as 3D predictors `X` and flat response `y`
|
|
|
|
c(X, y) %<-% local({
|
|
|
|
# Load from file
|
|
|
|
ds <- readRDS("eeg_data.rds")
|
|
|
|
|
|
|
|
# Dimension values
|
|
|
|
n <- nrow(ds) # sample size (nr. of people)
|
|
|
|
p <- 64L # nr. of predictors (count of sensorce)
|
|
|
|
t <- 256L # nr. of time points (measurements)
|
|
|
|
|
|
|
|
# Extract dimension names
|
|
|
|
nNames <- ds$PersonID
|
|
|
|
tNames <- as.character(seq(t))
|
|
|
|
pNames <- unlist(strsplit(colnames(ds)[2 + t * seq(p)], "_"))[c(TRUE, FALSE)]
|
|
|
|
|
|
|
|
# Split into predictors (with proper dims and names) and response
|
|
|
|
X <- array(as.matrix(ds[, -(1:2)]),
|
|
|
|
dim = c(person = n, time = t, sensor = p),
|
|
|
|
dimnames = list(person = nNames, time = tNames, sensor = pNames)
|
|
|
|
)
|
|
|
|
y <- ds$Case_Control
|
|
|
|
|
|
|
|
list(X, y)
|
|
|
|
})
|
|
|
|
|
|
|
|
################################################################################
|
|
|
|
### LOO-CV for Multiple Methods ###
|
|
|
|
################################################################################
|
|
|
|
|
|
|
|
# compatibility wrapper for function implemented with the "old" API
|
|
|
|
toNewAPI <- function(func) {
|
|
|
|
function(...) {
|
|
|
|
res <- func(...)
|
|
|
|
list(alphas = list(res$beta, res$alpha))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# Number of (2D)^2 PCA components per axis
|
|
|
|
npcs <- list(c(3, 4), c(15, 15), c(20, 30), dim(X)[-1])
|
|
|
|
|
|
|
|
# setup methods for simulation (with unified API)
|
|
|
|
methods <- list(
|
2022-05-25 16:19:08 +00:00
|
|
|
hopca = list(
|
|
|
|
fun = function(X, Fy) list(alphas = hopca(X, npc = c(1L, 1L), 1L)),
|
|
|
|
is.applicable = function(npc) all(npc == c(256L, 64L)) # NOT reduced
|
|
|
|
),
|
2022-05-24 19:07:40 +00:00
|
|
|
kpir.base = list(
|
|
|
|
fun = toNewAPI(kpir.base),
|
|
|
|
is.applicable = function(npc) prod(npc) < 100
|
|
|
|
),
|
|
|
|
kpir.new.vlp = list(
|
|
|
|
fun = toNewAPI(function(X, Fy) kpir.new(X, Fy, init.method = "vlp")),
|
|
|
|
is.applicable = function(npc) prod(npc) < 100
|
|
|
|
),
|
|
|
|
kpir.new.ls = list(
|
|
|
|
fun = toNewAPI(function(X, Fy) kpir.new(X, Fy, init.method = "ls")),
|
|
|
|
is.applicable = function(npc) prod(npc) < 100
|
|
|
|
),
|
|
|
|
kpir.ls = list(
|
|
|
|
fun = kpir.ls,
|
|
|
|
is.applicable = function(npc) TRUE
|
|
|
|
),
|
2022-05-25 16:19:08 +00:00
|
|
|
LSIR = list(
|
|
|
|
fun = function(X, Fy) {
|
|
|
|
res <- LSIR(matrix(X, nrow(X)), Fy, dim(X)[2], dim(X)[3])
|
|
|
|
list(alphas = list(res$beta, res$alpha))
|
|
|
|
},
|
|
|
|
is.applicable = function(npc) prod(npc) < 1000
|
|
|
|
),
|
2022-05-24 19:07:40 +00:00
|
|
|
kpir.momentum.vlp = list(
|
|
|
|
fun = toNewAPI(function(X, Fy) kpir.momentum(X, Fy, init.method = "vlp")),
|
|
|
|
is.applicable = function(npc) prod(npc) < 100
|
|
|
|
),
|
|
|
|
kpir.momentum.ls = list(
|
|
|
|
fun = toNewAPI(function(X, Fy) kpir.momentum(X, Fy, init.method = "ls")),
|
|
|
|
is.applicable = function(npc) prod(npc) < 100
|
|
|
|
),
|
|
|
|
kpir.approx.vlp = list(
|
|
|
|
fun = toNewAPI(function(X, Fy) kpir.approx(X, Fy, init.method = "vlp")),
|
|
|
|
is.applicable = function(npc) prod(npc) < 100
|
|
|
|
),
|
|
|
|
kpir.approx.ls = list(
|
|
|
|
fun = toNewAPI(function(X, Fy) kpir.approx(X, Fy, init.method = "ls")),
|
|
|
|
is.applicable = function(npc) TRUE
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
# define AUC for reporting while simulation is running
|
|
|
|
auc <- function(y_true, y_pred) pROC::roc(y_true, y_pred, quiet = TRUE)$auc[1]
|
|
|
|
|
2022-05-25 16:19:08 +00:00
|
|
|
# file to dump simulation results
|
|
|
|
log.file <- format(Sys.time(), "eeg_sim_%Y%m%dT%H%M.rds")
|
|
|
|
|
2022-05-24 19:07:40 +00:00
|
|
|
# init complete simulation as empty
|
|
|
|
sim <- NULL
|
|
|
|
for (npc in npcs) {
|
|
|
|
# check if any PC count is smaller than the axis
|
|
|
|
if (any(npc < dim(X)[-1])) {
|
|
|
|
# Reduce dimensions using (2D)^2 PCA, which is a special case of the Higher
|
|
|
|
# Order Principal Component Analysis
|
|
|
|
pcs <- hopca(X, npc = npc, sample.axis = 1)
|
|
|
|
# Reduce dimensions
|
|
|
|
X.pc <- mlm(X, Map(t, pcs), modes = 2:3)
|
|
|
|
} else {
|
|
|
|
# No reduction
|
|
|
|
X.pc <- X
|
|
|
|
}
|
|
|
|
|
|
|
|
for (name in names(methods)) {
|
|
|
|
# check if method can be applied to current reduction dimensions
|
|
|
|
if (!methods[[name]]$is.applicable(npc)) {
|
|
|
|
next
|
|
|
|
}
|
|
|
|
|
|
|
|
# extract method to be applied
|
|
|
|
method <- methods[[name]]$fun
|
|
|
|
|
|
|
|
# report name of current simulation method
|
|
|
|
cat(sprintf("npc: (t = %d, p = %d), method: %s\n", npc[1], npc[2], name))
|
|
|
|
|
|
|
|
# Leave-One-Out Cross-Validation
|
|
|
|
loo.cv <- data.frame(
|
|
|
|
y_true = y, y_pred = NA, # CV responses
|
|
|
|
elapsed = NA, sys.self = NA, user.self = NA # execution time
|
|
|
|
)
|
|
|
|
for (i in seq_len(nrow(X.pc))) {
|
|
|
|
# report progress
|
|
|
|
cat(sprintf("\r%3d/%d", i, nrow(X.pc)))
|
|
|
|
|
|
|
|
# Split into training/test data
|
|
|
|
X.train <- X.pc[-i, , ]
|
|
|
|
y.train <- scale(y[-i], scale = FALSE)
|
|
|
|
X.test <- X.pc[i, , , drop = FALSE]
|
|
|
|
y.test <- scale(y[i], center = attr(y.train, "scaled:center"), scale = FALSE)
|
|
|
|
|
|
|
|
# fit reduction (with method one of the methods to be "validated")
|
|
|
|
time <- system.time(sdr <- method(X.train, c(y.train)))
|
|
|
|
|
|
|
|
# reduce training data and fit a GLM
|
2022-05-25 16:19:08 +00:00
|
|
|
if ("Deltas" %in% names(sdr)) {
|
|
|
|
# the small deltas are `delta_j = Delta_j^-1 alpha_j`
|
|
|
|
deltas <- Map(solve, sdr$Deltas, sdr$alphas)
|
|
|
|
} else {
|
|
|
|
deltas <- sdr$alphas
|
|
|
|
}
|
|
|
|
x.train <- mlm(X.train, Map(t, deltas), modes = 2:3)
|
2022-05-24 19:07:40 +00:00
|
|
|
fit <- glm(y ~ x, family = binomial(link = "logit"),
|
|
|
|
data = data.frame(y = y[-i], x = matrix(x.train, nrow(x.train))))
|
|
|
|
|
|
|
|
# predict from reduced test data
|
2022-05-25 16:19:08 +00:00
|
|
|
x.test <- mlm(X.test, Map(t, deltas), modes = 2:3)
|
2022-05-24 19:07:40 +00:00
|
|
|
y.pred <- predict(fit, data.frame(x = matrix(x.test, 1)), type = "response")
|
|
|
|
|
|
|
|
loo.cv[i, "y_pred"] <- y.pred
|
|
|
|
loo.cv[i, "elapsed"] <- time["elapsed"]
|
|
|
|
loo.cv[i, "sys.self"] <- time["sys.self"]
|
|
|
|
loo.cv[i, "user.self"] <- time["user.self"]
|
|
|
|
}
|
|
|
|
|
|
|
|
# accumulate LOO-CV results to previous results
|
|
|
|
loo.cv$method <- factor(name)
|
|
|
|
loo.cv$npc <- factor(sprintf("(%d, %d)", npc[1], npc[2]))
|
|
|
|
sim <- rbind(sim, loo.cv)
|
|
|
|
|
|
|
|
# Report partial sim done and one of the interesting measures
|
|
|
|
cat(sprintf(" (Done) AUC: %f\n", with(loo.cv, auc(y_true, y_pred))))
|
|
|
|
|
|
|
|
# dump simulation (after each fold) to file
|
2022-05-25 16:19:08 +00:00
|
|
|
saveRDS(sim, log.file)
|
2022-05-24 19:07:40 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
################################################################################
|
|
|
|
### Simulation Stats ###
|
|
|
|
################################################################################
|
2022-05-25 16:19:08 +00:00
|
|
|
# sim <- readRDS("eeg_sim_<date-time>.rds")
|
|
|
|
# sim <- readRDS("eeg_sim_20220524T2100.rds")
|
|
|
|
# sim <- readRDS("eeg_sim_20220525T1700.rds")
|
2022-05-24 19:07:40 +00:00
|
|
|
|
|
|
|
metrics <- list(
|
|
|
|
# acc: Accuracy. P(Yhat = Y). Estimated as: (TP+TN)/(P+N).
|
|
|
|
"Acc" = function(y_true, y_pred) mean(round(y_pred) == y_true),
|
|
|
|
# err: Error rate. P(Yhat != Y). Estimated as: (FP+FN)/(P+N).
|
|
|
|
"Err" = function(y_true, y_pred) mean(round(y_pred) != y_true),
|
|
|
|
# fpr: False positive rate. P(Yhat = + | Y = -). aliases: Fallout.
|
|
|
|
"FPR" = function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 0]),
|
|
|
|
# tpr: True positive rate. P(Yhat = + | Y = +). aliases: Sensitivity, Recall.
|
|
|
|
"TPR" = function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 1]),
|
|
|
|
# fnr: False negative rate. P(Yhat = - | Y = +). aliases: Miss.
|
|
|
|
"FNR" = function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 1]),
|
|
|
|
# tnr: True negative rate. P(Yhat = - | Y = -).
|
|
|
|
"TNR" = function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 0]),
|
|
|
|
# auc: Area Under the Curve
|
|
|
|
"AUC" = function(y_true, y_pred) pROC::roc(y_true, y_pred, quiet = TRUE)$auc[1],
|
|
|
|
# auc.sd: Estimated standard error of the AUC estimate
|
|
|
|
"sd(AUC)" = function(y_true, y_pred)
|
|
|
|
sqrt(pROC::var(pROC::roc(y_true, y_pred, quiet = TRUE)))
|
|
|
|
)
|
|
|
|
|
|
|
|
# Applies metrics on a group
|
|
|
|
do.stats <- function(group) {
|
|
|
|
stat <- Map(do.call, metrics, list(as.list(group[c("y_true", "y_pred")])))
|
|
|
|
data.frame(method = group$method[1], npc = group$npc[1], stat, check.names = FALSE)
|
|
|
|
}
|
|
|
|
# Call stats for each grouping
|
2022-05-25 16:19:08 +00:00
|
|
|
stats <- do.call(rbind, Map(do.stats, split(sim, ~ method + npc, sep = " ", drop = TRUE)))
|
2022-05-24 19:07:40 +00:00
|
|
|
rownames(stats) <- NULL
|
|
|
|
|
|
|
|
print(stats, digits = 2)
|
|
|
|
|
|
|
|
# and execution time stats
|
|
|
|
times <- aggregate(cbind(elapsed, sys.self, user.self) ~ method + npc, sim, median)
|
|
|
|
|
|
|
|
print(times, digits = 2)
|
2022-05-25 16:19:08 +00:00
|
|
|
|
|
|
|
## stats: 2022.05.24
|
|
|
|
# method npc Acc Err FPR TPR FNR TNR AUC sd(AUC)
|
|
|
|
# 1 kpir.base (3, 4) 0.70 0.30 0.60 0.87 0.13 0.40 0.75 0.047
|
|
|
|
# 2 kpir.new.vlp (3, 4) 0.70 0.30 0.60 0.87 0.13 0.40 0.75 0.047
|
|
|
|
# 3 kpir.new.ls (3, 4) 0.74 0.26 0.51 0.88 0.12 0.49 0.77 0.045
|
|
|
|
# 4 kpir.ls (3, 4) 0.75 0.25 0.49 0.88 0.12 0.51 0.78 0.044
|
|
|
|
# (*) kpir.ls (3, 4) 0.78 0.22 0.38 0.87 0.13 0.62 0.86 0.034
|
|
|
|
# 5 kpir.momentum.vlp (3, 4) 0.70 0.30 0.60 0.87 0.13 0.40 0.75 0.047
|
|
|
|
# 6 kpir.momentum.ls (3, 4) 0.70 0.30 0.58 0.87 0.13 0.42 0.76 0.046
|
|
|
|
# 7 kpir.approx.vlp (3, 4) 0.68 0.32 0.62 0.86 0.14 0.38 0.74 0.048
|
|
|
|
# 8 kpir.approx.ls (3, 4) 0.73 0.27 0.53 0.88 0.12 0.47 0.78 0.044
|
|
|
|
# 9 kpir.ls (15, 15) 0.75 0.25 0.47 0.87 0.13 0.53 0.78 0.044
|
|
|
|
# (*) kpir.ls (15, 15) 0.76 0.24 0.44 0.88 0.12 0.56 0.83 0.039
|
|
|
|
# 10 kpir.approx.ls (15, 15) 0.73 0.27 0.51 0.87 0.13 0.49 0.78 0.044
|
|
|
|
# 11 kpir.ls (20, 30) 0.75 0.25 0.47 0.87 0.13 0.53 0.78 0.044
|
|
|
|
# (*) kpir.ls (20, 30) 0.77 0.23 0.36 0.84 0.16 0.64 0.79 0.045
|
|
|
|
# 12 kpir.approx.ls (20, 30) 0.63 0.37 1.00 1.00 0.00 0.00 0.51 0.053
|
|
|
|
# 13 kpir.ls (256, 64) 0.75 0.25 0.44 0.87 0.13 0.56 0.78 0.044
|
|
|
|
# (*) kpir.ls (256, 64) 0.68 0.32 0.51 0.79 0.21 0.49 0.66 0.054
|
|
|
|
# 14 kpir.approx.ls (256, 64) 0.75 0.25 0.44 0.87 0.13 0.56 0.78 0.044
|
|
|
|
#
|
|
|
|
# (*) Using reduction matrices `Map(solve, sdr$Deltas, sdr$alphas)` instead
|
|
|
|
# of only `sdr$alpha`.
|
|
|
|
|
|
|
|
## times: 2022.05.24
|
|
|
|
# method npc elapsed sys.self user.self
|
|
|
|
# 1 kpir.base (3, 4) 0.079 0.402 0.220
|
|
|
|
# 2 kpir.new.vlp (3, 4) 0.075 0.393 0.217
|
|
|
|
# 3 kpir.new.ls (3, 4) 0.218 0.243 0.305
|
|
|
|
# 4 kpir.ls (3, 4) 0.003 0.006 0.006
|
|
|
|
# 5 kpir.momentum.vlp (3, 4) 0.143 0.595 0.359
|
|
|
|
# 6 kpir.momentum.ls (3, 4) 0.297 0.252 0.385
|
|
|
|
# 7 kpir.approx.vlp (3, 4) 0.044 0.240 0.152
|
|
|
|
# 8 kpir.approx.ls (3, 4) 0.066 0.144 0.121
|
|
|
|
# 9 kpir.ls (15, 15) 0.012 0.059 0.034
|
|
|
|
# 10 kpir.approx.ls (15, 15) 0.813 3.911 2.325
|
|
|
|
# 11 kpir.ls (20, 30) 0.028 0.129 0.080
|
|
|
|
# 12 kpir.approx.ls (20, 30) 2.110 10.111 6.290
|
|
|
|
# 13 kpir.ls (256, 64) 1.252 6.215 3.681
|
|
|
|
# 14 kpir.approx.ls (256, 64) 36.754 141.028 147.490
|
|
|
|
|
|
|
|
|