wip: interface standardization and fixing bugs
This commit is contained in:
parent
4bc10018e3
commit
e2e8d19a0a
|
@ -1,29 +1,6 @@
|
||||||
library(tensorPredictors)
|
library(tensorPredictors)
|
||||||
|
library(parallel)
|
||||||
# Load as 3D predictors `X` and flat response `y` and `F = y` with per person dim. 1 x 1
|
library(pROC)
|
||||||
c(X, F, y) %<-% local({
|
|
||||||
# Load from file
|
|
||||||
ds <- readRDS("eeg_data.rds")
|
|
||||||
|
|
||||||
# Dimension values
|
|
||||||
n <- nrow(ds) # sample size (nr. of people)
|
|
||||||
p <- 64L # nr. of predictors (count of sensorce)
|
|
||||||
t <- 256L # nr. of time points (measurements)
|
|
||||||
|
|
||||||
# Extract dimension names
|
|
||||||
nNames <- ds$PersonID
|
|
||||||
tNames <- as.character(seq(t))
|
|
||||||
pNames <- unlist(strsplit(colnames(ds)[2 + t * seq(p)], "_"))[c(TRUE, FALSE)]
|
|
||||||
|
|
||||||
# Split into predictors (with proper dims and names) and response
|
|
||||||
X <- array(as.matrix(ds[, -(1:2)]),
|
|
||||||
dim = c(person = n, time = t, sensor = p),
|
|
||||||
dimnames = list(person = nNames, time = tNames, sensor = pNames)
|
|
||||||
)
|
|
||||||
y <- ds$Case_Control
|
|
||||||
|
|
||||||
list(X, array(y, c(n, 1L, 1L)), y)
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
#' (2D)^2 PCA preprocessing
|
#' (2D)^2 PCA preprocessing
|
||||||
|
@ -32,14 +9,14 @@ c(X, F, y) %<-% local({
|
||||||
#' @param ppc Number of "p"redictor "p"rincipal "c"omponents.
|
#' @param ppc Number of "p"redictor "p"rincipal "c"omponents.
|
||||||
preprocess <- function(X, tpc, ppc) {
|
preprocess <- function(X, tpc, ppc) {
|
||||||
# Mode covariances (for predictor and time point modes)
|
# Mode covariances (for predictor and time point modes)
|
||||||
c(Sigma_t, Sigma_p) %<-% mcov(X, sample.axis = 1L)
|
c(Sigma_t, Sigma_p) %<-% mcov(X, sample.axis = 3L)
|
||||||
|
|
||||||
# "predictor" (sensor) and time point principal components
|
# "predictor" (sensor) and time point principal components
|
||||||
V_t <- svd(Sigma_t, tpc, 0L)$u
|
V_t <- svd(Sigma_t, tpc, 0L)$u
|
||||||
V_p <- svd(Sigma_p, ppc, 0L)$u
|
V_p <- svd(Sigma_p, ppc, 0L)$u
|
||||||
|
|
||||||
# reduce with mode wise PCs
|
# reduce with mode wise PCs
|
||||||
mlm(X, list(V_t, V_p), modes = 2:3, transposed = TRUE)
|
mlm(X, list(V_t, V_p), modes = 1:2, transposed = TRUE)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -57,57 +34,57 @@ fnr <- function(y.true, y.pred) mean((round(y.pred) == 0)[y.true == 1])
|
||||||
# tnr: True negative rate. P(Yhat = - | Y = -).
|
# tnr: True negative rate. P(Yhat = - | Y = -).
|
||||||
tnr <- function(y.true, y.pred) mean((round(y.pred) == 0)[y.true == 0])
|
tnr <- function(y.true, y.pred) mean((round(y.pred) == 0)[y.true == 0])
|
||||||
# auc: Area Under the Curve
|
# auc: Area Under the Curve
|
||||||
auc <- function(y.true, y.pred) as.numeric(pROC::roc(y.true, y.pred, quiet = TRUE)$auc)
|
auc <- function(y.true, y.pred) {
|
||||||
auc.sd <- function(y.true, y.pred) sqrt(pROC::var(pROC::roc(y.true, y.pred, quiet = TRUE)))
|
as.numeric(pROC::roc(y.true, y.pred, quiet = TRUE, direction = "<")$auc)
|
||||||
|
}
|
||||||
|
auc.sd <- function(y.true, y.pred) {
|
||||||
|
sqrt(pROC::var(pROC::roc(y.true, y.pred, quiet = TRUE, direction = "<")))
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Load 2D (S1 stimulus only) EEG dataset of all subjects
|
||||||
|
c(X, y) %<-% readRDS("eeg_data_2d.rds")
|
||||||
|
|
||||||
|
|
||||||
##################################### GMLM #####################################
|
##################################### GMLM #####################################
|
||||||
|
|
||||||
# fit a tensor normal model to the data sample axis 1 indexes persons)
|
|
||||||
fit.gmlm <- gmlm_tensor_normal(X, F, sample.axis = 1L)
|
|
||||||
|
|
||||||
# plot the fitted mode wise reductions (for time and sensor axis)
|
|
||||||
with(fit.gmlm, {
|
|
||||||
par.reset <- par(mfrow = c(2, 1))
|
|
||||||
plot(seq(0, 1, len = 256), betas[[1]], main = "Time", xlab = "Time [s]", ylab = expression(beta[1]))
|
|
||||||
plot(betas[[2]], main = "Sensors", xlab = "Sensor Index", ylab = expression(beta[2]))
|
|
||||||
par(par.reset)
|
|
||||||
})
|
|
||||||
|
|
||||||
#' Leave-one-out prediction using GMLM
|
#' Leave-one-out prediction using GMLM
|
||||||
#'
|
#'
|
||||||
#' @param X 3D EEG data (preprocessed or not)
|
#' @param X 3D EEG data (preprocessed or not)
|
||||||
#' @param F binary responce `y` as a 3D tensor, every obs. is a 1 x 1 matrix
|
#' @param F binary responce `y` as a 3D tensor, every obs. is a 1 x 1 matrix
|
||||||
loo.predict.gmlm <- function(X, F) {
|
loo.predict.gmlm <- function(X, y) {
|
||||||
unlist(parallel::mclapply(seq_len(dim(X)[1L]), function(i) {
|
unlist(parallel::mclapply(seq_along(y), function(i) {
|
||||||
# Fit with i'th observation removed
|
# Fit with i'th observation removed
|
||||||
fit <- gmlm_tensor_normal(X[-i, , ], F[-i, , , drop = FALSE], sample.axis = 1L)
|
fit <- gmlm_tensor_normal(X[ , , -i], as.integer(y[-i]), sample.axis = 3L)
|
||||||
|
|
||||||
# Reduce the entire data set
|
# Reduce the entire data set
|
||||||
r <- as.vector(mlm(X, fit$betas, modes = 2:3, transpose = TRUE))
|
r <- as.vector(mlm(X, fit$betas, modes = 1:2, transpose = TRUE))
|
||||||
# Fit a logit model on reduced data with i'th observation removed
|
# Fit a logit model on reduced data with i'th observation removed
|
||||||
logit <- glm(y ~ r, family = binomial(link = "logit"),
|
logit <- glm(y ~ r, family = binomial(link = "logit"),
|
||||||
data = data.frame(y = y[-i], r = r[-i])
|
data = data.frame(y = y[-i], r = r[-i])
|
||||||
)
|
)
|
||||||
# predict i'th response given i'th reduced observation
|
# predict i'th response given i'th reduced observation
|
||||||
y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response")
|
y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response")
|
||||||
|
|
||||||
# report progress
|
# report progress
|
||||||
cat(sprintf("dim: (%d, %d) - %3d/%d\n", dim(X)[2L], dim(X)[3L], i, dim(X)[1L]))
|
cat(sprintf("dim: (%d, %d) - %3d/%d\n",
|
||||||
|
dim(X)[1L], dim(X)[2L], i, length(y))
|
||||||
|
)
|
||||||
|
|
||||||
y.hat
|
y.hat
|
||||||
}, mc.cores = getOption("mc.cores", max(1L, parallel::detectCores() - 1L))))
|
}, mc.cores = getOption("mc.cores", max(1L, parallel::detectCores() - 1L))))
|
||||||
}
|
}
|
||||||
|
|
||||||
# perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction
|
# perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction
|
||||||
y.hat.3.4 <- loo.predict.gmlm(preprocess(X, 3, 4), F)
|
y.hat.3.4 <- loo.predict.gmlm(preprocess(X, 3, 4), y)
|
||||||
y.hat.15.15 <- loo.predict.gmlm(preprocess(X, 15, 15), F)
|
y.hat.15.15 <- loo.predict.gmlm(preprocess(X, 15, 15), y)
|
||||||
y.hat.20.30 <- loo.predict.gmlm(preprocess(X, 20, 30), F)
|
y.hat.20.30 <- loo.predict.gmlm(preprocess(X, 20, 30), y)
|
||||||
y.hat <- loo.predict.gmlm(X, F)
|
y.hat <- loo.predict.gmlm(X, y)
|
||||||
|
|
||||||
# classification performance measures table by leave-one-out cross-validation
|
# classification performance measures table by leave-one-out cross-validation
|
||||||
(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) {
|
(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) {
|
||||||
sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"),
|
sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"),
|
||||||
function(FUN) { match.fun(FUN)(y, y.pred) })
|
function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) })
|
||||||
}))
|
}))
|
||||||
#> y.hat.3.4 y.hat.15.15 y.hat.20.30 y.hat
|
#> y.hat.3.4 y.hat.15.15 y.hat.20.30 y.hat
|
||||||
#> acc 0.79508197 0.78688525 0.78688525 0.78688525
|
#> acc 0.79508197 0.78688525 0.78688525 0.78688525
|
||||||
|
@ -126,21 +103,26 @@ y.hat <- loo.predict.gmlm(X, F)
|
||||||
#'
|
#'
|
||||||
#' @param X 3D EEG data (preprocessed or not)
|
#' @param X 3D EEG data (preprocessed or not)
|
||||||
#' @param y binary responce vector
|
#' @param y binary responce vector
|
||||||
loo.predict.tsir <- function(X, y) {
|
loo.predict.tsir <- function(X, y, cond.threshold = Inf) {
|
||||||
unlist(parallel::mclapply(seq_len(dim(X)[1L]), function(i) {
|
unlist(parallel::mclapply(seq_along(y), function(i) {
|
||||||
# Fit with i'th observation removed
|
# Fit with i'th observation removed
|
||||||
fit <- TSIR(X[-i, , ], y[-i], c(1L, 1L), sample.axis = 1L)
|
fit <- TSIR(X[ , , -i], y[-i], sample.axis = 3L,
|
||||||
|
cond.threshold = cond.threshold
|
||||||
|
)
|
||||||
|
|
||||||
# Reduce the entire data set
|
# Reduce the entire data set
|
||||||
r <- as.vector(mlm(X, fit, modes = 2:3, transpose = TRUE))
|
r <- as.vector(mlm(X, fit, modes = 1:2, transpose = TRUE))
|
||||||
# Fit a logit model on reduced data with i'th observation removed
|
# Fit a logit model on reduced data with i'th observation removed
|
||||||
logit <- glm(y ~ r, family = binomial(link = "logit"),
|
logit <- glm(y ~ r, family = binomial(link = "logit"),
|
||||||
data = data.frame(y = y[-i], r = r[-i])
|
data = data.frame(y = y[-i], r = r[-i])
|
||||||
)
|
)
|
||||||
# predict i'th response given i'th reduced observation
|
# predict i'th response given i'th reduced observation
|
||||||
y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response")
|
y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response")
|
||||||
|
|
||||||
# report progress
|
# report progress
|
||||||
cat(sprintf("dim: (%d, %d) - %3d/%d\n", dim(X)[2L], dim(X)[3L], i, dim(X)[1L]))
|
cat(sprintf("dim: (%d, %d) - %3d/%d\n",
|
||||||
|
dim(X)[1], dim(X)[2], i, length(y)
|
||||||
|
))
|
||||||
|
|
||||||
y.hat
|
y.hat
|
||||||
}, mc.cores = getOption("mc.cores", max(1L, parallel::detectCores() - 1L))))
|
}, mc.cores = getOption("mc.cores", max(1L, parallel::detectCores() - 1L))))
|
||||||
|
@ -155,7 +137,7 @@ y.hat <- loo.predict.tsir(X, y)
|
||||||
# classification performance measures table by leave-one-out cross-validation
|
# classification performance measures table by leave-one-out cross-validation
|
||||||
(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) {
|
(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) {
|
||||||
sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"),
|
sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"),
|
||||||
function(FUN) { match.fun(FUN)(y, y.pred) })
|
function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) })
|
||||||
}))
|
}))
|
||||||
#> y.hat.3.4 y.hat.15.15 y.hat.20.30 y.hat
|
#> y.hat.3.4 y.hat.15.15 y.hat.20.30 y.hat
|
||||||
#> acc 0.79508197 0.78688525 0.7540984 0.67213115
|
#> acc 0.79508197 0.78688525 0.7540984 0.67213115
|
||||||
|
@ -166,3 +148,82 @@ y.hat <- loo.predict.tsir(X, y)
|
||||||
#> tnr 0.66666667 0.62222222 0.6222222 0.46666667
|
#> tnr 0.66666667 0.62222222 0.6222222 0.46666667
|
||||||
#> auc 0.84646465 0.83376623 0.8040404 0.68946609
|
#> auc 0.84646465 0.83376623 0.8040404 0.68946609
|
||||||
#> auc.sd 0.03596227 0.04092069 0.0446129 0.05196611
|
#> auc.sd 0.03596227 0.04092069 0.0446129 0.05196611
|
||||||
|
|
||||||
|
|
||||||
|
# perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction
|
||||||
|
# including mode-wise covariance regularization via condition threshold similar
|
||||||
|
# to the regularization employed by tensor-normal GMLM
|
||||||
|
y.hat.3.4 <- loo.predict.tsir(preprocess(X, 3, 4), y, cond.threshold = 25)
|
||||||
|
y.hat.15.15 <- loo.predict.tsir(preprocess(X, 15, 15), y, cond.threshold = 25)
|
||||||
|
y.hat.20.30 <- loo.predict.tsir(preprocess(X, 20, 30), y, cond.threshold = 25)
|
||||||
|
y.hat <- loo.predict.tsir(X, y, cond.threshold = 25)
|
||||||
|
|
||||||
|
# classification performance measures table by leave-one-out cross-validation
|
||||||
|
(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) {
|
||||||
|
sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"),
|
||||||
|
function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) })
|
||||||
|
}))
|
||||||
|
#> y.hat.3.4 y.hat.15.15 y.hat.20.30 y.hat
|
||||||
|
#> acc 0.79508197 0.78688525 0.78688525 0.78688525
|
||||||
|
#> err 0.20491803 0.21311475 0.21311475 0.21311475
|
||||||
|
#> fpr 0.33333333 0.40000000 0.40000000 0.40000000
|
||||||
|
#> tpr 0.87012987 0.89610390 0.89610390 0.89610390
|
||||||
|
#> fnr 0.12987013 0.10389610 0.10389610 0.10389610
|
||||||
|
#> tnr 0.66666667 0.60000000 0.60000000 0.60000000
|
||||||
|
#> auc 0.84646465 0.84329004 0.84444444 0.84386724
|
||||||
|
#> auc.sd 0.03596227 0.03666439 0.03636842 0.03650638
|
||||||
|
|
||||||
|
|
||||||
|
##################################### LSIR #####################################
|
||||||
|
|
||||||
|
#' Leave-one-out prediction using LSIR
|
||||||
|
#'
|
||||||
|
#' @param X 3D EEG data (preprocessed or not)
|
||||||
|
#' @param y binary responce vector
|
||||||
|
#' @param cond.threshold (approx) condition number threshold to apply
|
||||||
|
#' regularization to the mode-wise covariances `Cov(X_(j))`, a value of `Inf`
|
||||||
|
#' means "no regularization".
|
||||||
|
loo.predict.lsir <- function(X, y) {
|
||||||
|
unlist(parallel::mclapply(seq_along(y), function(i) {
|
||||||
|
# Fit with i'th observation removed
|
||||||
|
fit <- LSIR(X[ , , -i], y[-i], sample.axis = 3L)
|
||||||
|
|
||||||
|
# Reduce the entire data set
|
||||||
|
r <- as.vector(mlm(X, fit$betas, modes = 1:2, transpose = TRUE))
|
||||||
|
# Fit a logit model on reduced data with i'th observation removed
|
||||||
|
logit <- glm(y ~ r, family = binomial(link = "logit"),
|
||||||
|
data = data.frame(y = y[-i], r = r[-i])
|
||||||
|
)
|
||||||
|
# predict i'th response given i'th reduced observation
|
||||||
|
y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response")
|
||||||
|
|
||||||
|
# report progress
|
||||||
|
cat(sprintf("dim: (%d, %d) - %3d/%d\n",
|
||||||
|
dim(X)[1], dim(X)[2], i, length(y)
|
||||||
|
))
|
||||||
|
|
||||||
|
y.hat
|
||||||
|
}, mc.cores = getOption("mc.cores", max(1L, parallel::detectCores() - 1L))))
|
||||||
|
}
|
||||||
|
|
||||||
|
# perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction
|
||||||
|
y.hat.3.4 <- loo.predict.lsir(preprocess(X, 3, 4), y)
|
||||||
|
y.hat.15.15 <- loo.predict.lsir(preprocess(X, 15, 15), y)
|
||||||
|
y.hat.20.30 <- loo.predict.lsir(preprocess(X, 20, 30), y)
|
||||||
|
y.hat <- loo.predict.lsir(X, y)
|
||||||
|
|
||||||
|
|
||||||
|
# classification performance measures table by leave-one-out cross-validation
|
||||||
|
(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) {
|
||||||
|
sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"),
|
||||||
|
function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) })
|
||||||
|
}))
|
||||||
|
#> y.hat.3.4 y.hat.15.15 y.hat.20.30 y.hat
|
||||||
|
#> acc 0.79508197 0.72131148 0.78688525 0.4918033
|
||||||
|
#> err 0.20491803 0.27868852 0.21311475 0.5081967
|
||||||
|
#> fpr 0.35555556 0.44444444 0.35555556 0.7333333
|
||||||
|
#> tpr 0.88311688 0.81818182 0.87012987 0.6233766
|
||||||
|
#> fnr 0.11688312 0.18181818 0.12987013 0.3766234
|
||||||
|
#> tnr 0.64444444 0.55555556 0.64444444 0.2666667
|
||||||
|
#> auc 0.84963925 0.81298701 0.83145743 0.3909091
|
||||||
|
#> auc.sd 0.03639394 0.03998711 0.03815816 0.0540805
|
||||||
|
|
|
@ -2,7 +2,7 @@ library(tensorPredictors)
|
||||||
library(parallel)
|
library(parallel)
|
||||||
library(pROC)
|
library(pROC)
|
||||||
|
|
||||||
#' Mode-Wise PCA preprocessing
|
#' Mode-Wise PCA preprocessing (generalized (2D)^2 PCA)
|
||||||
#'
|
#'
|
||||||
#' @param npc_time Number of Principal Components for time axis
|
#' @param npc_time Number of Principal Components for time axis
|
||||||
#' @param npc_sensor Number of Principal Components for sensor axis
|
#' @param npc_sensor Number of Principal Components for sensor axis
|
||||||
|
@ -35,24 +35,50 @@ fnr <- function(y.true, y.pred) mean((round(y.pred) == 0)[y.true == 1])
|
||||||
# tnr: True negative rate. P(Yhat = - | Y = -).
|
# tnr: True negative rate. P(Yhat = - | Y = -).
|
||||||
tnr <- function(y.true, y.pred) mean((round(y.pred) == 0)[y.true == 0])
|
tnr <- function(y.true, y.pred) mean((round(y.pred) == 0)[y.true == 0])
|
||||||
# auc: Area Under the Curve
|
# auc: Area Under the Curve
|
||||||
auc <- function(y.true, y.pred) as.numeric(pROC::roc(y.true, y.pred, quiet = TRUE)$auc)
|
auc <- function(y.true, y.pred) {
|
||||||
auc.sd <- function(y.true, y.pred) sqrt(pROC::var(pROC::roc(y.true, y.pred, quiet = TRUE)))
|
as.numeric(pROC::roc(y.true, y.pred, quiet = TRUE, direction = "<")$auc)
|
||||||
|
}
|
||||||
|
auc.sd <- function(y.true, y.pred) {
|
||||||
|
sqrt(pROC::var(pROC::roc(y.true, y.pred, quiet = TRUE, direction = "<")))
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# Load full EEG dataset of all subjects
|
# # unified API for all reduction procedures
|
||||||
c(X, y) %<-% readRDS("eeg_data_3d.rds")
|
# GMLM <- list(
|
||||||
|
# fit = function(X, y) tensorPredictors::gmlm_tensor_normal(X, as.integer(y), sample.axis = 4L),
|
||||||
|
# reduce = function(X, fit) mlm(X, fit$betas, 1:3, TRUE),
|
||||||
|
# applicable = function(X) TRUE
|
||||||
|
# )
|
||||||
|
# TSIR <- list(
|
||||||
|
# fit = function(X, y) tensorPredictors::TSIR(X, y, c(1L, 1L, 1L), sample.axis = 4L),
|
||||||
|
# reduce = function(X, fit) mlm(X, fit, 1:3, TRUE),
|
||||||
|
# applicable = function(X) TRUE
|
||||||
|
# )
|
||||||
|
# KPIR_LS <- list(
|
||||||
|
# fit = function(X, y) {
|
||||||
|
# if (any(dim(X)[-4] > dim(X)[4])) {
|
||||||
|
# stop("Dimensions too big")
|
||||||
|
# }
|
||||||
|
# tensorPredictors::kpir.ls(X, as.integer(y), sample.axis = 4L)
|
||||||
|
# },
|
||||||
|
# reduce = function(X, fit) if (is.null(fit)) NA else mlm(X, fit$alphas, 1:3, TRUE),
|
||||||
|
# applicable = function(X) all(dim(X)[1:3] <= dim(X)[4])
|
||||||
|
# )
|
||||||
|
|
||||||
|
#' Leave-one-out prediction using TSIR
|
||||||
##################################### GMLM #####################################
|
|
||||||
|
|
||||||
#' Leave-one-out prediction using GMLM
|
|
||||||
#'
|
#'
|
||||||
|
#' @param method reduction method to be applied
|
||||||
#' @param X 3D EEG data (preprocessed or not)
|
#' @param X 3D EEG data (preprocessed or not)
|
||||||
#' @param y binary responce `y` as a 3D tensor, every obs. is a 1 x 1 matrix
|
#' @param y binary responce vector
|
||||||
loo.predict.gmlm <- function(X, y) {
|
#' @param ... additional arguments passed on to `method`
|
||||||
|
loo.predict <- function(method, X, y, ...) {
|
||||||
|
# get method function name as character string for logging
|
||||||
|
method.name <- as.character(substitute(method))
|
||||||
|
|
||||||
|
# Parallel Leave-One-Out prediction
|
||||||
unlist(parallel::mclapply(seq_along(y), function(i) {
|
unlist(parallel::mclapply(seq_along(y), function(i) {
|
||||||
# Fit with i'th observation removed
|
# Fit with i'th observation removed
|
||||||
fit <- gmlm_tensor_normal(X[ , , , -i], as.integer(y[-i]), sample.axis = 4L)
|
fit <- method(X[ , , , -i], y[-i], sample.axis = 4L, ...)
|
||||||
|
|
||||||
# Reduce the entire data set
|
# Reduce the entire data set
|
||||||
r <- as.vector(mlm(X, fit$betas, modes = 1:3, transpose = TRUE))
|
r <- as.vector(mlm(X, fit$betas, modes = 1:3, transpose = TRUE))
|
||||||
|
@ -64,8 +90,8 @@ loo.predict.gmlm <- function(X, y) {
|
||||||
y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response")
|
y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response")
|
||||||
|
|
||||||
# report progress
|
# report progress
|
||||||
cat(sprintf("dim: (%d, %d, %d) - %3d/%d\n",
|
cat(sprintf("%s - dim: (%d, %d, %d) - %3d/%d\n",
|
||||||
dim(X)[1], dim(X)[2], dim(X)[3], i, length(y)
|
method.name, dim(X)[1], dim(X)[2], dim(X)[3], i, length(y)
|
||||||
))
|
))
|
||||||
|
|
||||||
y.hat
|
y.hat
|
||||||
|
@ -73,65 +99,41 @@ loo.predict.gmlm <- function(X, y) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction
|
# Load full EEG dataset (3D tensor for each subject)
|
||||||
y.hat.3.4 <- loo.predict.gmlm(preprocess(X, 3, 4, 3), y)
|
c(X, y) %<-% readRDS("eeg_data_3d.rds")
|
||||||
y.hat.15.15 <- loo.predict.gmlm(preprocess(X, 15, 15, 3), y)
|
|
||||||
y.hat.20.30 <- loo.predict.gmlm(preprocess(X, 20, 30, 3), y)
|
|
||||||
y.hat <- loo.predict.gmlm(X, y)
|
|
||||||
|
|
||||||
|
|
||||||
|
##################################### GMLM #####################################
|
||||||
|
|
||||||
|
# perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction
|
||||||
|
y.hat.3.4 <- loo.predict(gmlm_tensor_normal, preprocess(X, 3, 4, 3), y)
|
||||||
|
y.hat.15.15 <- loo.predict(gmlm_tensor_normal, preprocess(X, 15, 15, 3), y)
|
||||||
|
y.hat.20.30 <- loo.predict(gmlm_tensor_normal, preprocess(X, 20, 30, 3), y)
|
||||||
|
y.hat <- loo.predict(gmlm_tensor_normal, X, y)
|
||||||
|
|
||||||
# classification performance measures table by leave-one-out cross-validation
|
# classification performance measures table by leave-one-out cross-validation
|
||||||
(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) {
|
(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) {
|
||||||
sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"),
|
sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"),
|
||||||
function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) })
|
function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) })
|
||||||
}))
|
}))
|
||||||
#> y.hat.3.4 y.hat.15.15 y.hat.20.30 y.hat
|
#> y.hat.3.4 y.hat.15.15 y.hat.20.30 y.hat
|
||||||
#> acc 0.83606557 0.80327869 0.80327869 0.80327869
|
#> acc 0.83606557 0.80327869 0.80327869 0.79508197
|
||||||
#> err 0.16393443 0.19672131 0.19672131 0.19672131
|
#> err 0.16393443 0.19672131 0.19672131 0.20491803
|
||||||
#> fpr 0.31111111 0.33333333 0.33333333 0.33333333
|
#> fpr 0.31111111 0.33333333 0.33333333 0.35555556
|
||||||
#> tpr 0.92207792 0.88311688 0.88311688 0.88311688
|
#> tpr 0.92207792 0.88311688 0.88311688 0.88311688
|
||||||
#> fnr 0.07792208 0.11688312 0.11688312 0.11688312
|
#> fnr 0.07792208 0.11688312 0.11688312 0.11688312
|
||||||
#> tnr 0.68888889 0.66666667 0.66666667 0.66666667
|
#> tnr 0.68888889 0.66666667 0.66666667 0.64444444
|
||||||
#> auc 0.88023088 0.87070707 0.87041847 0.86810967
|
#> auc 0.88051948 0.86984127 0.86926407 0.86810967
|
||||||
#> auc.sd 0.03124875 0.03244623 0.03248653 0.03295883
|
#> auc.sd 0.03118211 0.03254642 0.03259186 0.03295883
|
||||||
|
|
||||||
|
|
||||||
################################## Tensor SIR ##################################
|
################################## Tensor SIR ##################################
|
||||||
|
|
||||||
#' Leave-one-out prediction using TSIR
|
|
||||||
#'
|
|
||||||
#' @param X 3D EEG data (preprocessed or not)
|
|
||||||
#' @param y binary responce vector
|
|
||||||
loo.predict.tsir <- function(X, y) {
|
|
||||||
unlist(parallel::mclapply(seq_along(y), function(i) {
|
|
||||||
# Fit with i'th observation removed
|
|
||||||
fit <- TSIR(X[ , , , -i], y[-i], c(1L, 1L, 1L), sample.axis = 4L)
|
|
||||||
|
|
||||||
# Reduce the entire data set
|
|
||||||
r <- as.vector(mlm(X, fit, modes = 1:3, transpose = TRUE))
|
|
||||||
# Fit a logit model on reduced data with i'th observation removed
|
|
||||||
logit <- glm(y ~ r, family = binomial(link = "logit"),
|
|
||||||
data = data.frame(y = y[-i], r = r[-i])
|
|
||||||
)
|
|
||||||
# predict i'th response given i'th reduced observation
|
|
||||||
y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response")
|
|
||||||
|
|
||||||
# report progress
|
|
||||||
cat(sprintf("dim: (%d, %d, %d) - %3d/%d\n",
|
|
||||||
dim(X)[1], dim(X)[2], dim(X)[3], i, length(y)
|
|
||||||
))
|
|
||||||
|
|
||||||
y.hat
|
|
||||||
}, mc.cores = getOption("mc.cores", max(1L, parallel::detectCores() - 1L))))
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction
|
# perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction
|
||||||
y.hat.3.4 <- loo.predict.tsir(preprocess(X, 3, 4, 3), y)
|
y.hat.3.4 <- loo.predict(TSIR, preprocess(X, 3, 4, 3), y)
|
||||||
y.hat.15.15 <- loo.predict.tsir(preprocess(X, 15, 15, 3), y)
|
y.hat.15.15 <- loo.predict(TSIR, preprocess(X, 15, 15, 3), y)
|
||||||
y.hat.20.30 <- loo.predict.tsir(preprocess(X, 20, 30, 3), y)
|
y.hat.20.30 <- loo.predict(TSIR, preprocess(X, 20, 30, 3), y)
|
||||||
y.hat <- loo.predict.tsir(X, y)
|
y.hat <- loo.predict(TSIR, X, y)
|
||||||
|
|
||||||
|
|
||||||
# classification performance measures table by leave-one-out cross-validation
|
# classification performance measures table by leave-one-out cross-validation
|
||||||
(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) {
|
(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) {
|
||||||
|
@ -147,3 +149,25 @@ y.hat <- loo.predict.tsir(X, y)
|
||||||
#> tnr 0.66666667 0.75555556 0.66666667 0.66666667
|
#> tnr 0.66666667 0.75555556 0.66666667 0.66666667
|
||||||
#> auc 0.86522367 0.89379509 0.88196248 0.85974026
|
#> auc 0.86522367 0.89379509 0.88196248 0.85974026
|
||||||
#> auc.sd 0.03357539 0.03055047 0.02986038 0.03367847
|
#> auc.sd 0.03357539 0.03055047 0.02986038 0.03367847
|
||||||
|
|
||||||
|
|
||||||
|
# perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction
|
||||||
|
y.hat.3.4 <- loo.predict(TSIR, preprocess(X, 3, 4, 3), y, cond.threshold = 25)
|
||||||
|
y.hat.15.15 <- loo.predict(TSIR, preprocess(X, 15, 15, 3), y, cond.threshold = 25)
|
||||||
|
y.hat.20.30 <- loo.predict(TSIR, preprocess(X, 20, 30, 3), y, cond.threshold = 25)
|
||||||
|
y.hat <- loo.predict(TSIR, X, y, cond.threshold = 25)
|
||||||
|
|
||||||
|
# classification performance measures table by leave-one-out cross-validation
|
||||||
|
(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) {
|
||||||
|
sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"),
|
||||||
|
function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) })
|
||||||
|
}))
|
||||||
|
#> y.hat.3.4 y.hat.15.15 y.hat.20.30 y.hat
|
||||||
|
#> acc 0.81967213 0.77049180 0.76229508 0.77049180
|
||||||
|
#> err 0.18032787 0.22950820 0.23770492 0.22950820
|
||||||
|
#> fpr 0.33333333 0.37777778 0.40000000 0.37777778
|
||||||
|
#> tpr 0.90909091 0.85714286 0.85714286 0.85714286
|
||||||
|
#> fnr 0.09090909 0.14285714 0.14285714 0.14285714
|
||||||
|
#> tnr 0.66666667 0.62222222 0.60000000 0.62222222
|
||||||
|
#> auc 0.86522367 0.84386724 0.84415584 0.84040404
|
||||||
|
#> auc.sd 0.03357539 0.03542706 0.03519592 0.03558135
|
||||||
|
|
|
@ -40,20 +40,20 @@ LSIR <- function(X, y,
|
||||||
modes <- seq_along(dim(X))[-1L]
|
modes <- seq_along(dim(X))[-1L]
|
||||||
n <- dim(X)[1L]
|
n <- dim(X)[1L]
|
||||||
|
|
||||||
sigmas <- lapply(seq_along(modes), function(i) {
|
Sigmas <- lapply(seq_along(modes), function(i) {
|
||||||
matrix(rowMeans(apply(X, modes[-i], cov)), dim(X)[modes[i]])
|
matrix(rowMeans(apply(X, modes[-i], cov)), dim(X)[modes[i]])
|
||||||
})
|
})
|
||||||
# Omega_i = Sigma_i^{-1 / 2}
|
# Omega_i = Sigma_i^{-1 / 2}
|
||||||
isqrt_sigmas <- Map(matpow, sigmas, -1 / 2)
|
isqrt_Sigmas <- Map(matpow, Sigmas, -1 / 2)
|
||||||
|
|
||||||
# Normalize observations
|
# Normalize observations
|
||||||
Z <- mlm(X - rep(colMeans(X), each = dim(X)[1L]), isqrt_sigmas, modes = modes)
|
Z <- mlm(X - rep(colMeans(X), each = dim(X)[1L]), isqrt_Sigmas, modes = modes)
|
||||||
|
|
||||||
# Estimate conditional covariances Omega = Cov(E[Z | Y])
|
# Estimate conditional covariances Omega = Cov(E[Z | Y])
|
||||||
slice.args <- c(
|
slice.args <- c(
|
||||||
list(Z), rep(alist(, )[1], length(dim(X))), list(drop = FALSE)
|
list(Z), rep(alist(, )[1], length(dim(X))), list(drop = FALSE)
|
||||||
)
|
)
|
||||||
omegas <- lapply(seq_along(modes), function(i) {
|
Omegas <- lapply(seq_along(modes), function(i) {
|
||||||
matrix(Reduce(`+`, lapply(levels(y), function(l) {
|
matrix(Reduce(`+`, lapply(levels(y), function(l) {
|
||||||
slice.args[[2]] <- y == l
|
slice.args[[2]] <- y == l
|
||||||
rowMeans(apply(do.call(`[`, slice.args), modes[-i], function(z) {
|
rowMeans(apply(do.call(`[`, slice.args), modes[-i], function(z) {
|
||||||
|
@ -65,7 +65,7 @@ LSIR <- function(X, y,
|
||||||
# Compute central subspace basis estimate
|
# Compute central subspace basis estimate
|
||||||
betas <- mapply(function(isqrt_sigma, omega, reduction_dim) {
|
betas <- mapply(function(isqrt_sigma, omega, reduction_dim) {
|
||||||
isqrt_sigma %*% La.svd(omega, reduction_dim, 0L)$u
|
isqrt_sigma %*% La.svd(omega, reduction_dim, 0L)$u
|
||||||
}, isqrt_sigmas, omegas, reduction.dims, SIMPLIFY = FALSE)
|
}, isqrt_Sigmas, Omegas, reduction.dims, SIMPLIFY = FALSE)
|
||||||
|
|
||||||
list(betas = betas)
|
list(betas = betas, Sigmas = Sigmas, Omegas = Omegas)
|
||||||
}
|
}
|
||||||
|
|
|
@ -135,5 +135,5 @@ TSIR <- function(X, y,
|
||||||
|
|
||||||
# reductions matrices `Omega_k^-1 Gamma_k` where there (reverse) kronecker
|
# reductions matrices `Omega_k^-1 Gamma_k` where there (reverse) kronecker
|
||||||
# product spans the central tensor subspace (CTS) estimate
|
# product spans the central tensor subspace (CTS) estimate
|
||||||
structure(Map(solve, Omegas, Gammas), mcov = Omegas, Gammas = Gammas)
|
list(betas = Map(solve, Omegas, Gammas), Omegas = Omegas, Gammas = Gammas)
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,7 +31,9 @@ rtensornorm <- function(n, mean, cov, sample.axis = 1L) {
|
||||||
}
|
}
|
||||||
|
|
||||||
# add mean (using recycling, observations on last mode)
|
# add mean (using recycling, observations on last mode)
|
||||||
X <- X + c(mean)
|
if (!missing(mean)) {
|
||||||
|
X <- X + c(mean)
|
||||||
|
}
|
||||||
|
|
||||||
# permute axis for indexing observations on sample mode (permute first axis
|
# permute axis for indexing observations on sample mode (permute first axis
|
||||||
# with sample mode axis)
|
# with sample mode axis)
|
||||||
|
|
Loading…
Reference in New Issue