94 lines
3.6 KiB
R
94 lines
3.6 KiB
R
options(keep.source = TRUE, keep.source.pkgs = TRUE)
|
|
|
|
library(tensorPredictors)
|
|
|
|
# Load as 3D predictors `X` and flat response `y`
|
|
c(X, F, y) %<-% local({
|
|
# Load from file
|
|
ds <- readRDS("eeg_data.rds")
|
|
|
|
# Dimension values
|
|
n <- nrow(ds) # sample size (nr. of people)
|
|
p <- 64L # nr. of predictors (count of sensorce)
|
|
t <- 256L # nr. of time points (measurements)
|
|
|
|
# Extract dimension names
|
|
nNames <- ds$PersonID
|
|
tNames <- as.character(seq(t))
|
|
pNames <- unlist(strsplit(colnames(ds)[2 + t * seq(p)], "_"))[c(TRUE, FALSE)]
|
|
|
|
# Split into predictors (with proper dims and names) and response
|
|
X <- array(as.matrix(ds[, -(1:2)]),
|
|
dim = c(person = n, time = t, sensor = p),
|
|
dimnames = list(person = nNames, time = tNames, sensor = pNames)
|
|
)
|
|
y <- ds$Case_Control
|
|
|
|
list(X, array(y, c(n, 1L, 1L)), y)
|
|
})
|
|
|
|
# fit a tensor normal model to the data sample axis 1 indexes persons)
|
|
fit.gmlm <- gmlm_tensor_normal(X, F, sample.axis = 1L)
|
|
|
|
# Performa a LOO prediction
|
|
y.hat <- sapply(seq_len(dim(X)[1L]), function(i) {
|
|
# Fit with i'th observation removes
|
|
fit <- gmlm_tensor_normal(X[-i, , ], F[-i, , , drop = FALSE], sample.axis = 1L)
|
|
|
|
# Reduce the entire data set
|
|
r <- as.vector(mlm(X, fit$betas, modes = 2:3, transpose = TRUE))
|
|
# Fit a logit model on reduced data with i'th observation removed
|
|
logit <- glm(y ~ r, family = binomial(link = "logit"),
|
|
data = data.frame(y = y[-i], r = r[-i])
|
|
)
|
|
# predict i'th response given i'th reduced observation
|
|
y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response")
|
|
# report progress
|
|
cat(sprintf("%3d/%d\n", i, dim(X)[1L]))
|
|
|
|
y.hat
|
|
})
|
|
|
|
### Classification performance measures
|
|
# acc: Accuracy. P(Yhat = Y). Estimated as: (TP+TN)/(P+N).
|
|
(acc <- mean(round(y.hat) == y)) # 0.7868852
|
|
# err: Error rate. P(Yhat != Y). Estimated as: (FP+FN)/(P+N).
|
|
(err <- mean(round(y.hat) != y)) # 0.2131148
|
|
# fpr: False positive rate. P(Yhat = + | Y = -). aliases: Fallout.
|
|
(fpr <- mean((round(y.hat) == 1)[y == 0])) # 0.4
|
|
# tpr: True positive rate. P(Yhat = + | Y = +). aliases: Sensitivity, Recall.
|
|
(tpr <- mean((round(y.hat) == 1)[y == 1])) # 0.8961039
|
|
# fnr: False negative rate. P(Yhat = - | Y = +). aliases: Miss.
|
|
(fnr <- mean((round(y.hat) == 0)[y == 1])) # 0.1038961
|
|
# tnr: True negative rate. P(Yhat = - | Y = -).
|
|
(tnr <- mean((round(y.hat) == 0)[y == 0])) # 0.6
|
|
# auc: Area Under the Curve
|
|
(auc <- pROC::roc(y, y.hat, quiet = TRUE)$auc) # 0.838961
|
|
|
|
|
|
with(fit.gmlm, {
|
|
par.reset <- par(mfrow = c(2, 1))
|
|
plot(seq(0, 1, len = 256), betas[[1]], main = "Time", xlab = "Time [s]", ylab = expression(beta[1]))
|
|
plot(betas[[2]], main = "Sensors", xlab = "Sensor Index", ylab = expression(beta[2]))
|
|
par(par.reset)
|
|
})
|
|
|
|
|
|
# dimX <- c(4, 3, 5)
|
|
# Omegas <- Map(function(p) {
|
|
# O <- matrix(rnorm(p^2), p)
|
|
# O %*% t(O)
|
|
# }, dimX)
|
|
|
|
# # Numerically more stable version of `sum(log(mapply(det, Omegas)) / dimX)`
|
|
# # which is itself equivalent to `log(det(Omega)) / prod(nrow(Omega))` where
|
|
# # `Omega <- Reduce(kronecker, rev(Omegas))`.
|
|
|
|
# Omega <- Reduce(kronecker, rev(Omegas))
|
|
# log(det(Omega)) / prod(nrow(Omega))
|
|
|
|
# (det.Omega <- sum(log(mapply(det, Omegas)) / dimX))
|
|
# sum(mapply(function(Omega) {
|
|
# sum(log(eigen(Omega, TRUE, TRUE)$values))
|
|
# }, Omegas) / dimX)
|