239 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
			
		
		
	
	
			239 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
library(tensorPredictors)
 | 
						|
library(parallel)
 | 
						|
library(pROC)
 | 
						|
 | 
						|
 | 
						|
#' (2D)^2 PCA preprocessing
 | 
						|
#'
 | 
						|
#' @param tpc Number of "t"ime "p"rincipal "c"omponents.
 | 
						|
#' @param ppc Number of "p"redictor "p"rincipal "c"omponents.
 | 
						|
preprocess <- function(X, tpc, ppc) {
 | 
						|
    # Mode covariances (for predictor and time point modes)
 | 
						|
    c(Sigma_t, Sigma_p) %<-% mcov(X, sample.axis = 3L)
 | 
						|
 | 
						|
    # "predictor" (sensor) and time point principal components
 | 
						|
    V_t <- svd(Sigma_t, tpc, 0L)$u
 | 
						|
    V_p <- svd(Sigma_p, ppc, 0L)$u
 | 
						|
 | 
						|
    # reduce with mode wise PCs
 | 
						|
    mlm(X, list(V_t, V_p), modes = 1:2, transposed = TRUE)
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
### Classification performance measures
 | 
						|
# acc: Accuracy. P(Yhat = Y). Estimated as: (TP+TN)/(P+N).
 | 
						|
acc <- function(y.true, y.pred) mean(round(y.pred) == y.true)
 | 
						|
# err: Error rate. P(Yhat != Y). Estimated as: (FP+FN)/(P+N).
 | 
						|
err <- function(y.true, y.pred) mean(round(y.pred) != y.true)
 | 
						|
# fpr: False positive rate. P(Yhat = + | Y = -). aliases: Fallout.
 | 
						|
fpr <- function(y.true, y.pred) mean((round(y.pred) == 1)[y.true == 0])
 | 
						|
# tpr: True positive rate.  P(Yhat = + | Y = +). aliases: Sensitivity, Recall.
 | 
						|
tpr <- function(y.true, y.pred) mean((round(y.pred) == 1)[y.true == 1])
 | 
						|
# fnr: False negative rate. P(Yhat = - | Y = +). aliases: Miss.
 | 
						|
fnr <- function(y.true, y.pred) mean((round(y.pred) == 0)[y.true == 1])
 | 
						|
# tnr: True negative rate.  P(Yhat = - | Y = -).
 | 
						|
tnr <- function(y.true, y.pred) mean((round(y.pred) == 0)[y.true == 0])
 | 
						|
# auc: Area Under the Curve
 | 
						|
auc <- function(y.true, y.pred) {
 | 
						|
    as.numeric(pROC::roc(y.true, y.pred, quiet = TRUE, direction = "<")$auc)
 | 
						|
}
 | 
						|
auc.sd <- function(y.true, y.pred) {
 | 
						|
    sqrt(pROC::var(pROC::roc(y.true, y.pred, quiet = TRUE, direction = "<")))
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
# Load 2D (S1 stimulus only) EEG dataset of all subjects
 | 
						|
c(X, y) %<-% readRDS("eeg_data_2d.rds")
 | 
						|
 | 
						|
 | 
						|
##################################### GMLM #####################################
 | 
						|
 | 
						|
#' Leave-one-out prediction using GMLM
 | 
						|
#'
 | 
						|
#' @param X 3D EEG data (preprocessed or not)
 | 
						|
#' @param F binary responce `y` as a 3D tensor, every obs. is a 1 x 1 matrix
 | 
						|
loo.predict.gmlm <- function(X, y, ...) {
 | 
						|
    unlist(parallel::mclapply(seq_along(y), function(i) {
 | 
						|
        # Fit with i'th observation removed
 | 
						|
        fit <- gmlm_tensor_normal(X[ , , -i], as.integer(y[-i]), sample.axis = 3L, ...)
 | 
						|
 | 
						|
        # Reduce the entire data set
 | 
						|
        r <- as.vector(mlm(X, fit$betas, modes = 1:2, transpose = TRUE))
 | 
						|
        # Fit a logit model on reduced data with i'th observation removed
 | 
						|
        logit <- glm(y ~ r, family = binomial(link = "logit"),
 | 
						|
            data = data.frame(y = y[-i], r = r[-i])
 | 
						|
        )
 | 
						|
        # predict i'th response given i'th reduced observation
 | 
						|
        y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response")
 | 
						|
 | 
						|
        # report progress
 | 
						|
        cat(sprintf("dim: (%d, %d) - %3d/%d\n",
 | 
						|
            dim(X)[1L], dim(X)[2L], i, length(y))
 | 
						|
        )
 | 
						|
 | 
						|
        y.hat
 | 
						|
    }, mc.cores = getOption("mc.cores", max(1L, parallel::detectCores() - 1L))))
 | 
						|
}
 | 
						|
 | 
						|
proj.fft <- function(beta1, nr.freq = 5L) {
 | 
						|
    F <- fft(beta1)
 | 
						|
    Re(fft(`[<-`(F, head(order(abs(F)), -nr.freq), 0+0i), inverse = TRUE)) / length(F)
 | 
						|
}
 | 
						|
 | 
						|
# perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction
 | 
						|
y.hat.3.4   <- loo.predict.gmlm(preprocess(X,  3,  4), y)
 | 
						|
y.hat.15.15 <- loo.predict.gmlm(preprocess(X, 15, 15), y)
 | 
						|
y.hat.20.30 <- loo.predict.gmlm(preprocess(X, 20, 30), y)
 | 
						|
y.hat       <- loo.predict.gmlm(X, y)
 | 
						|
y.hat.fft   <- loo.predict.gmlm(X, y, proj.betas = list(proj.fft, NULL))
 | 
						|
 | 
						|
# classification performance measures table by leave-one-out cross-validation
 | 
						|
(loo.cv <- apply(
 | 
						|
    cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat, y.hat.fft), 2,
 | 
						|
    function(y.pred) {
 | 
						|
        sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"),
 | 
						|
            function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) })
 | 
						|
    }
 | 
						|
))
 | 
						|
#>         y.hat.3.4 y.hat.15.15 y.hat.20.30      y.hat  y.hat.fft
 | 
						|
#> acc    0.79508197  0.78688525  0.78688525 0.78688525 0.81147541
 | 
						|
#> err    0.20491803  0.21311475  0.21311475 0.21311475 0.18852459
 | 
						|
#> fpr    0.35555556  0.40000000  0.40000000 0.40000000 0.33333333
 | 
						|
#> tpr    0.88311688  0.89610390  0.89610390 0.89610390 0.89610390
 | 
						|
#> fnr    0.11688312  0.10389610  0.10389610 0.10389610 0.10389610
 | 
						|
#> tnr    0.64444444  0.60000000  0.60000000 0.60000000 0.66666667
 | 
						|
#> auc    0.85194805  0.83838384  0.83924964 0.83896104 0.84646465
 | 
						|
#> auc.sd 0.03574475  0.03760531  0.03751754 0.03754553 0.03751864
 | 
						|
 | 
						|
 | 
						|
################################## Tensor SIR ##################################
 | 
						|
 | 
						|
#' Leave-one-out prediction using TSIR
 | 
						|
#'
 | 
						|
#' @param X 3D EEG data (preprocessed or not)
 | 
						|
#' @param y binary responce vector
 | 
						|
loo.predict.tsir <- function(X, y, cond.threshold = Inf) {
 | 
						|
    unlist(parallel::mclapply(seq_along(y), function(i) {
 | 
						|
        # Fit with i'th observation removed
 | 
						|
        fit <- TSIR(X[ , , -i], y[-i], sample.axis = 3L,
 | 
						|
            cond.threshold = cond.threshold
 | 
						|
        )
 | 
						|
 | 
						|
        # Reduce the entire data set
 | 
						|
        r <- as.vector(mlm(X, fit, modes = 1:2, transpose = TRUE))
 | 
						|
        # Fit a logit model on reduced data with i'th observation removed
 | 
						|
        logit <- glm(y ~ r, family = binomial(link = "logit"),
 | 
						|
            data = data.frame(y = y[-i], r = r[-i])
 | 
						|
        )
 | 
						|
        # predict i'th response given i'th reduced observation
 | 
						|
        y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response")
 | 
						|
 | 
						|
        # report progress
 | 
						|
        cat(sprintf("dim: (%d, %d) - %3d/%d\n",
 | 
						|
            dim(X)[1], dim(X)[2], i, length(y)
 | 
						|
        ))
 | 
						|
 | 
						|
        y.hat
 | 
						|
    }, mc.cores = getOption("mc.cores", max(1L, parallel::detectCores() - 1L))))
 | 
						|
}
 | 
						|
 | 
						|
# perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction
 | 
						|
y.hat.3.4   <- loo.predict.tsir(preprocess(X,  3,  4), y)
 | 
						|
y.hat.15.15 <- loo.predict.tsir(preprocess(X, 15, 15), y)
 | 
						|
y.hat.20.30 <- loo.predict.tsir(preprocess(X, 20, 30), y)
 | 
						|
y.hat       <- loo.predict.tsir(X, y)
 | 
						|
 | 
						|
# classification performance measures table by leave-one-out cross-validation
 | 
						|
(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) {
 | 
						|
    sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"),
 | 
						|
        function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) })
 | 
						|
}))
 | 
						|
#>         y.hat.3.4 y.hat.15.15 y.hat.20.30      y.hat
 | 
						|
#> acc    0.79508197  0.78688525   0.7540984 0.67213115
 | 
						|
#> err    0.20491803  0.21311475   0.2459016 0.32786885
 | 
						|
#> fpr    0.33333333  0.37777778   0.3777778 0.53333333
 | 
						|
#> tpr    0.87012987  0.88311688   0.8311688 0.79220779
 | 
						|
#> fnr    0.12987013  0.11688312   0.1688312 0.20779221
 | 
						|
#> tnr    0.66666667  0.62222222   0.6222222 0.46666667
 | 
						|
#> auc    0.84646465  0.83376623   0.8040404 0.68946609
 | 
						|
#> auc.sd 0.03596227  0.04092069   0.0446129 0.05196611
 | 
						|
 | 
						|
 | 
						|
# perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction
 | 
						|
# including mode-wise covariance regularization via condition threshold similar
 | 
						|
# to the regularization employed by tensor-normal GMLM
 | 
						|
y.hat.3.4   <- loo.predict.tsir(preprocess(X,  3,  4), y, cond.threshold = 25)
 | 
						|
y.hat.15.15 <- loo.predict.tsir(preprocess(X, 15, 15), y, cond.threshold = 25)
 | 
						|
y.hat.20.30 <- loo.predict.tsir(preprocess(X, 20, 30), y, cond.threshold = 25)
 | 
						|
y.hat       <- loo.predict.tsir(X, y, cond.threshold = 25)
 | 
						|
 | 
						|
# classification performance measures table by leave-one-out cross-validation
 | 
						|
(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) {
 | 
						|
    sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"),
 | 
						|
        function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) })
 | 
						|
}))
 | 
						|
#>         y.hat.3.4 y.hat.15.15 y.hat.20.30      y.hat
 | 
						|
#> acc    0.79508197  0.78688525  0.78688525 0.78688525
 | 
						|
#> err    0.20491803  0.21311475  0.21311475 0.21311475
 | 
						|
#> fpr    0.33333333  0.40000000  0.40000000 0.40000000
 | 
						|
#> tpr    0.87012987  0.89610390  0.89610390 0.89610390
 | 
						|
#> fnr    0.12987013  0.10389610  0.10389610 0.10389610
 | 
						|
#> tnr    0.66666667  0.60000000  0.60000000 0.60000000
 | 
						|
#> auc    0.84646465  0.84329004  0.84444444 0.84386724
 | 
						|
#> auc.sd 0.03596227  0.03666439  0.03636842 0.03650638
 | 
						|
 | 
						|
 | 
						|
##################################### LSIR #####################################
 | 
						|
 | 
						|
#' Leave-one-out prediction using LSIR
 | 
						|
#'
 | 
						|
#' @param X 3D EEG data (preprocessed or not)
 | 
						|
#' @param y binary responce vector
 | 
						|
#' @param cond.threshold (approx) condition number threshold to apply
 | 
						|
#'  regularization to the mode-wise covariances `Cov(X_(j))`, a value of `Inf`
 | 
						|
#'  means "no regularization".
 | 
						|
loo.predict.lsir <- function(X, y) {
 | 
						|
    unlist(parallel::mclapply(seq_along(y), function(i) {
 | 
						|
        # Fit with i'th observation removed
 | 
						|
        fit <- LSIR(X[ , , -i], y[-i], sample.axis = 3L)
 | 
						|
 | 
						|
        # Reduce the entire data set
 | 
						|
        r <- as.vector(mlm(X, fit$betas, modes = 1:2, transpose = TRUE))
 | 
						|
        # Fit a logit model on reduced data with i'th observation removed
 | 
						|
        logit <- glm(y ~ r, family = binomial(link = "logit"),
 | 
						|
            data = data.frame(y = y[-i], r = r[-i])
 | 
						|
        )
 | 
						|
        # predict i'th response given i'th reduced observation
 | 
						|
        y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response")
 | 
						|
 | 
						|
        # report progress
 | 
						|
        cat(sprintf("dim: (%d, %d) - %3d/%d\n",
 | 
						|
            dim(X)[1], dim(X)[2], i, length(y)
 | 
						|
        ))
 | 
						|
 | 
						|
        y.hat
 | 
						|
    }, mc.cores = getOption("mc.cores", max(1L, parallel::detectCores() - 1L))))
 | 
						|
}
 | 
						|
 | 
						|
# perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction
 | 
						|
y.hat.3.4   <- loo.predict.lsir(preprocess(X,  3,  4), y)
 | 
						|
y.hat.15.15 <- loo.predict.lsir(preprocess(X, 15, 15), y)
 | 
						|
y.hat.20.30 <- loo.predict.lsir(preprocess(X, 20, 30), y)
 | 
						|
y.hat       <- loo.predict.lsir(X, y)
 | 
						|
 | 
						|
 | 
						|
# classification performance measures table by leave-one-out cross-validation
 | 
						|
(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) {
 | 
						|
    sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"),
 | 
						|
        function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) })
 | 
						|
}))
 | 
						|
#>         y.hat.3.4 y.hat.15.15 y.hat.20.30     y.hat
 | 
						|
#> acc    0.79508197  0.72131148  0.78688525 0.4918033
 | 
						|
#> err    0.20491803  0.27868852  0.21311475 0.5081967
 | 
						|
#> fpr    0.35555556  0.44444444  0.35555556 0.7333333
 | 
						|
#> tpr    0.88311688  0.81818182  0.87012987 0.6233766
 | 
						|
#> fnr    0.11688312  0.18181818  0.12987013 0.3766234
 | 
						|
#> tnr    0.64444444  0.55555556  0.64444444 0.2666667
 | 
						|
#> auc    0.84963925  0.81298701  0.83145743 0.3909091
 | 
						|
#> auc.sd 0.03639394  0.03998711  0.03815816 0.0540805
 |