tensor_predictors/dataAnalysis/eeg/eeg.R

94 lines
3.6 KiB
R

options(keep.source = TRUE, keep.source.pkgs = TRUE)
library(tensorPredictors)
# Load as 3D predictors `X` and flat response `y`
c(X, F, y) %<-% local({
# Load from file
ds <- readRDS("eeg_data.rds")
# Dimension values
n <- nrow(ds) # sample size (nr. of people)
p <- 64L # nr. of predictors (count of sensorce)
t <- 256L # nr. of time points (measurements)
# Extract dimension names
nNames <- ds$PersonID
tNames <- as.character(seq(t))
pNames <- unlist(strsplit(colnames(ds)[2 + t * seq(p)], "_"))[c(TRUE, FALSE)]
# Split into predictors (with proper dims and names) and response
X <- array(as.matrix(ds[, -(1:2)]),
dim = c(person = n, time = t, sensor = p),
dimnames = list(person = nNames, time = tNames, sensor = pNames)
)
y <- ds$Case_Control
list(X, array(y, c(n, 1L, 1L)), y)
})
# fit a tensor normal model to the data sample axis 1 indexes persons)
fit.gmlm <- gmlm_tensor_normal(X, F, sample.axis = 1L)
# Performa a LOO prediction
y.hat <- sapply(seq_len(dim(X)[1L]), function(i) {
# Fit with i'th observation removes
fit <- gmlm_tensor_normal(X[-i, , ], F[-i, , , drop = FALSE], sample.axis = 1L)
# Reduce the entire data set
r <- as.vector(mlm(X, fit$betas, modes = 2:3, transpose = TRUE))
# Fit a logit model on reduced data with i'th observation removed
logit <- glm(y ~ r, family = binomial(link = "logit"),
data = data.frame(y = y[-i], r = r[-i])
)
# predict i'th response given i'th reduced observation
y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response")
# report progress
cat(sprintf("%3d/%d\n", i, dim(X)[1L]))
y.hat
})
### Classification performance measures
# acc: Accuracy. P(Yhat = Y). Estimated as: (TP+TN)/(P+N).
(acc <- mean(round(y.hat) == y)) # 0.7868852
# err: Error rate. P(Yhat != Y). Estimated as: (FP+FN)/(P+N).
(err <- mean(round(y.hat) != y)) # 0.2131148
# fpr: False positive rate. P(Yhat = + | Y = -). aliases: Fallout.
(fpr <- mean((round(y.hat) == 1)[y == 0])) # 0.4
# tpr: True positive rate. P(Yhat = + | Y = +). aliases: Sensitivity, Recall.
(tpr <- mean((round(y.hat) == 1)[y == 1])) # 0.8961039
# fnr: False negative rate. P(Yhat = - | Y = +). aliases: Miss.
(fnr <- mean((round(y.hat) == 0)[y == 1])) # 0.1038961
# tnr: True negative rate. P(Yhat = - | Y = -).
(tnr <- mean((round(y.hat) == 0)[y == 0])) # 0.6
# auc: Area Under the Curve
(auc <- pROC::roc(y, y.hat, quiet = TRUE)$auc) # 0.838961
with(fit.gmlm, {
par.reset <- par(mfrow = c(2, 1))
plot(seq(0, 1, len = 256), betas[[1]], main = "Time", xlab = "Time [s]", ylab = expression(beta[1]))
plot(betas[[2]], main = "Sensors", xlab = "Sensor Index", ylab = expression(beta[2]))
par(par.reset)
})
# dimX <- c(4, 3, 5)
# Omegas <- Map(function(p) {
# O <- matrix(rnorm(p^2), p)
# O %*% t(O)
# }, dimX)
# # Numerically more stable version of `sum(log(mapply(det, Omegas)) / dimX)`
# # which is itself equivalent to `log(det(Omega)) / prod(nrow(Omega))` where
# # `Omega <- Reduce(kronecker, rev(Omegas))`.
# Omega <- Reduce(kronecker, rev(Omegas))
# log(det(Omega)) / prod(nrow(Omega))
# (det.Omega <- sum(log(mapply(det, Omegas)) / dimX))
# sum(mapply(function(Omega) {
# sum(log(eigen(Omega, TRUE, TRUE)$values))
# }, Omegas) / dimX)