library(tensorPredictors) library(parallel) library(pROC) #' (2D)^2 PCA preprocessing #' #' @param tpc Number of "t"ime "p"rincipal "c"omponents. #' @param ppc Number of "p"redictor "p"rincipal "c"omponents. preprocess <- function(X, tpc, ppc) { # Mode covariances (for predictor and time point modes) c(Sigma_t, Sigma_p) %<-% mcov(X, sample.axis = 3L) # "predictor" (sensor) and time point principal components V_t <- svd(Sigma_t, tpc, 0L)$u V_p <- svd(Sigma_p, ppc, 0L)$u # reduce with mode wise PCs mlm(X, list(V_t, V_p), modes = 1:2, transposed = TRUE) } ### Classification performance measures # acc: Accuracy. P(Yhat = Y). Estimated as: (TP+TN)/(P+N). acc <- function(y.true, y.pred) mean(round(y.pred) == y.true) # err: Error rate. P(Yhat != Y). Estimated as: (FP+FN)/(P+N). err <- function(y.true, y.pred) mean(round(y.pred) != y.true) # fpr: False positive rate. P(Yhat = + | Y = -). aliases: Fallout. fpr <- function(y.true, y.pred) mean((round(y.pred) == 1)[y.true == 0]) # tpr: True positive rate. P(Yhat = + | Y = +). aliases: Sensitivity, Recall. tpr <- function(y.true, y.pred) mean((round(y.pred) == 1)[y.true == 1]) # fnr: False negative rate. P(Yhat = - | Y = +). aliases: Miss. fnr <- function(y.true, y.pred) mean((round(y.pred) == 0)[y.true == 1]) # tnr: True negative rate. P(Yhat = - | Y = -). tnr <- function(y.true, y.pred) mean((round(y.pred) == 0)[y.true == 0]) # auc: Area Under the Curve auc <- function(y.true, y.pred) { as.numeric(pROC::roc(y.true, y.pred, quiet = TRUE, direction = "<")$auc) } auc.sd <- function(y.true, y.pred) { sqrt(pROC::var(pROC::roc(y.true, y.pred, quiet = TRUE, direction = "<"))) } # Load 2D (S1 stimulus only) EEG dataset of all subjects c(X, y) %<-% readRDS("eeg_data_2d.rds") ##################################### GMLM ##################################### #' Leave-one-out prediction using GMLM #' #' @param X 3D EEG data (preprocessed or not) #' @param F binary responce `y` as a 3D tensor, every obs. is a 1 x 1 matrix loo.predict.gmlm <- function(X, y) { unlist(parallel::mclapply(seq_along(y), function(i) { # Fit with i'th observation removed fit <- gmlm_tensor_normal(X[ , , -i], as.integer(y[-i]), sample.axis = 3L) # Reduce the entire data set r <- as.vector(mlm(X, fit$betas, modes = 1:2, transpose = TRUE)) # Fit a logit model on reduced data with i'th observation removed logit <- glm(y ~ r, family = binomial(link = "logit"), data = data.frame(y = y[-i], r = r[-i]) ) # predict i'th response given i'th reduced observation y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response") # report progress cat(sprintf("dim: (%d, %d) - %3d/%d\n", dim(X)[1L], dim(X)[2L], i, length(y)) ) y.hat }, mc.cores = getOption("mc.cores", max(1L, parallel::detectCores() - 1L)))) } # perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction y.hat.3.4 <- loo.predict.gmlm(preprocess(X, 3, 4), y) y.hat.15.15 <- loo.predict.gmlm(preprocess(X, 15, 15), y) y.hat.20.30 <- loo.predict.gmlm(preprocess(X, 20, 30), y) y.hat <- loo.predict.gmlm(X, y) # classification performance measures table by leave-one-out cross-validation (loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) { sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"), function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) }) })) #> y.hat.3.4 y.hat.15.15 y.hat.20.30 y.hat #> acc 0.79508197 0.78688525 0.78688525 0.78688525 #> err 0.20491803 0.21311475 0.21311475 0.21311475 #> fpr 0.35555556 0.40000000 0.40000000 0.40000000 #> tpr 0.88311688 0.89610390 0.89610390 0.89610390 #> fnr 0.11688312 0.10389610 0.10389610 0.10389610 #> tnr 0.64444444 0.60000000 0.60000000 0.60000000 #> auc 0.85108225 0.83838384 0.83924964 0.83896104 #> auc.sd 0.03584791 0.03760531 0.03751307 0.03754553 ################################## Tensor SIR ################################## #' Leave-one-out prediction using TSIR #' #' @param X 3D EEG data (preprocessed or not) #' @param y binary responce vector loo.predict.tsir <- function(X, y, cond.threshold = Inf) { unlist(parallel::mclapply(seq_along(y), function(i) { # Fit with i'th observation removed fit <- TSIR(X[ , , -i], y[-i], sample.axis = 3L, cond.threshold = cond.threshold ) # Reduce the entire data set r <- as.vector(mlm(X, fit, modes = 1:2, transpose = TRUE)) # Fit a logit model on reduced data with i'th observation removed logit <- glm(y ~ r, family = binomial(link = "logit"), data = data.frame(y = y[-i], r = r[-i]) ) # predict i'th response given i'th reduced observation y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response") # report progress cat(sprintf("dim: (%d, %d) - %3d/%d\n", dim(X)[1], dim(X)[2], i, length(y) )) y.hat }, mc.cores = getOption("mc.cores", max(1L, parallel::detectCores() - 1L)))) } # perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction y.hat.3.4 <- loo.predict.tsir(preprocess(X, 3, 4), y) y.hat.15.15 <- loo.predict.tsir(preprocess(X, 15, 15), y) y.hat.20.30 <- loo.predict.tsir(preprocess(X, 20, 30), y) y.hat <- loo.predict.tsir(X, y) # classification performance measures table by leave-one-out cross-validation (loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) { sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"), function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) }) })) #> y.hat.3.4 y.hat.15.15 y.hat.20.30 y.hat #> acc 0.79508197 0.78688525 0.7540984 0.67213115 #> err 0.20491803 0.21311475 0.2459016 0.32786885 #> fpr 0.33333333 0.37777778 0.3777778 0.53333333 #> tpr 0.87012987 0.88311688 0.8311688 0.79220779 #> fnr 0.12987013 0.11688312 0.1688312 0.20779221 #> tnr 0.66666667 0.62222222 0.6222222 0.46666667 #> auc 0.84646465 0.83376623 0.8040404 0.68946609 #> auc.sd 0.03596227 0.04092069 0.0446129 0.05196611 # perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction # including mode-wise covariance regularization via condition threshold similar # to the regularization employed by tensor-normal GMLM y.hat.3.4 <- loo.predict.tsir(preprocess(X, 3, 4), y, cond.threshold = 25) y.hat.15.15 <- loo.predict.tsir(preprocess(X, 15, 15), y, cond.threshold = 25) y.hat.20.30 <- loo.predict.tsir(preprocess(X, 20, 30), y, cond.threshold = 25) y.hat <- loo.predict.tsir(X, y, cond.threshold = 25) # classification performance measures table by leave-one-out cross-validation (loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) { sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"), function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) }) })) #> y.hat.3.4 y.hat.15.15 y.hat.20.30 y.hat #> acc 0.79508197 0.78688525 0.78688525 0.78688525 #> err 0.20491803 0.21311475 0.21311475 0.21311475 #> fpr 0.33333333 0.40000000 0.40000000 0.40000000 #> tpr 0.87012987 0.89610390 0.89610390 0.89610390 #> fnr 0.12987013 0.10389610 0.10389610 0.10389610 #> tnr 0.66666667 0.60000000 0.60000000 0.60000000 #> auc 0.84646465 0.84329004 0.84444444 0.84386724 #> auc.sd 0.03596227 0.03666439 0.03636842 0.03650638 ##################################### LSIR ##################################### #' Leave-one-out prediction using LSIR #' #' @param X 3D EEG data (preprocessed or not) #' @param y binary responce vector #' @param cond.threshold (approx) condition number threshold to apply #' regularization to the mode-wise covariances `Cov(X_(j))`, a value of `Inf` #' means "no regularization". loo.predict.lsir <- function(X, y) { unlist(parallel::mclapply(seq_along(y), function(i) { # Fit with i'th observation removed fit <- LSIR(X[ , , -i], y[-i], sample.axis = 3L) # Reduce the entire data set r <- as.vector(mlm(X, fit$betas, modes = 1:2, transpose = TRUE)) # Fit a logit model on reduced data with i'th observation removed logit <- glm(y ~ r, family = binomial(link = "logit"), data = data.frame(y = y[-i], r = r[-i]) ) # predict i'th response given i'th reduced observation y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response") # report progress cat(sprintf("dim: (%d, %d) - %3d/%d\n", dim(X)[1], dim(X)[2], i, length(y) )) y.hat }, mc.cores = getOption("mc.cores", max(1L, parallel::detectCores() - 1L)))) } # perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction y.hat.3.4 <- loo.predict.lsir(preprocess(X, 3, 4), y) y.hat.15.15 <- loo.predict.lsir(preprocess(X, 15, 15), y) y.hat.20.30 <- loo.predict.lsir(preprocess(X, 20, 30), y) y.hat <- loo.predict.lsir(X, y) # classification performance measures table by leave-one-out cross-validation (loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) { sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"), function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) }) })) #> y.hat.3.4 y.hat.15.15 y.hat.20.30 y.hat #> acc 0.79508197 0.72131148 0.78688525 0.4918033 #> err 0.20491803 0.27868852 0.21311475 0.5081967 #> fpr 0.35555556 0.44444444 0.35555556 0.7333333 #> tpr 0.88311688 0.81818182 0.87012987 0.6233766 #> fnr 0.11688312 0.18181818 0.12987013 0.3766234 #> tnr 0.64444444 0.55555556 0.64444444 0.2666667 #> auc 0.84963925 0.81298701 0.83145743 0.3909091 #> auc.sd 0.03639394 0.03998711 0.03815816 0.0540805