From 76d705035faf0d31042ba6b526036c8cf9563396 Mon Sep 17 00:00:00 2001
From: daniel <daniel@kapla.at>
Date: Wed, 5 Feb 2025 13:49:35 +0100
Subject: [PATCH] added eeg data and tsir to its analysis

---
 .gitattributes                |   2 +
 .gitignore                    |   4 +-
 dataAnalysis/eeg/eeg.R        | 129 +++++++++++++++++++++++-----------
 dataAnalysis/eeg/eeg_data.rds |   3 +
 4 files changed, 96 insertions(+), 42 deletions(-)
 create mode 100644 .gitattributes
 create mode 100644 dataAnalysis/eeg/eeg_data.rds

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..25baab1
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+*.rds filter=lfs diff=lfs merge=lfs -text
+*.Rds filter=lfs diff=lfs merge=lfs -text
diff --git a/.gitignore b/.gitignore
index d332568..c6a1db2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,8 +45,8 @@
 
 ## R environment, data and package build intermediate files/folders
 # R Data Object files
-*.Rds
-*.rds
+# *.Rds     # git-lfs
+# *.rds     # git-lfs
 *.Rdata
 
 # Example code in package build process
diff --git a/dataAnalysis/eeg/eeg.R b/dataAnalysis/eeg/eeg.R
index 468c81f..a529f4f 100644
--- a/dataAnalysis/eeg/eeg.R
+++ b/dataAnalysis/eeg/eeg.R
@@ -25,17 +25,6 @@ c(X, F, y) %<-% local({
     list(X, array(y, c(n, 1L, 1L)), y)
 })
 
-# fit a tensor normal model to the data sample axis 1 indexes persons)
-fit.gmlm <- gmlm_tensor_normal(X, F, sample.axis = 1L)
-
-# plot the fitted mode wise reductions (for time and sensor axis)
-with(fit.gmlm, {
-    par.reset <- par(mfrow = c(2, 1))
-    plot(seq(0, 1, len = 256), betas[[1]], main = "Time", xlab = "Time [s]", ylab = expression(beta[1]))
-    plot(betas[[2]], main = "Sensors", xlab = "Sensor Index", ylab = expression(beta[2]))
-    par(par.reset)
-})
-
 
 #' (2D)^2 PCA preprocessing
 #'
@@ -54,31 +43,6 @@ preprocess <- function(X, tpc, ppc) {
 }
 
 
-#' Leave-one-out prediction
-#'
-#' @param X 3D EEG data (preprocessed or not)
-#' @param F binary responce `y` as a 3D tensor, every obs. is a 1 x 1 matrix
-loo.predict <- function(X, F) {
-    sapply(seq_len(dim(X)[1L]), function(i) {
-        # Fit with i'th observation removes
-        fit <- gmlm_tensor_normal(X[-i, , ], F[-i, , , drop = FALSE], sample.axis = 1L)
-
-        # Reduce the entire data set
-        r <- as.vector(mlm(X, fit$betas, modes = 2:3, transpose = TRUE))
-        # Fit a logit model on reduced data with i'th observation removed
-        logit <- glm(y ~ r, family = binomial(link = "logit"),
-            data = data.frame(y = y[-i], r = r[-i])
-        )
-        # predict i'th response given i'th reduced observation
-        y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response")
-        # report progress
-        cat(sprintf("dim: (%d, %d) - %3d/%d\n", dim(X)[2L], dim(X)[3L], i, dim(X)[1L]))
-
-        y.hat
-    })
-}
-
-
 ### Classification performance measures
 # acc: Accuracy. P(Yhat = Y). Estimated as: (TP+TN)/(P+N).
 acc <- function(y.true, y.pred) mean(round(y.pred) == y.true)
@@ -97,11 +61,48 @@ auc <- function(y.true, y.pred) as.numeric(pROC::roc(y.true, y.pred, quiet = TRU
 auc.sd <- function(y.true, y.pred) sqrt(pROC::var(pROC::roc(y.true, y.pred, quiet = TRUE)))
 
 
+##################################### GMLM #####################################
+
+# fit a tensor normal model to the data sample axis 1 indexes persons)
+fit.gmlm <- gmlm_tensor_normal(X, F, sample.axis = 1L)
+
+# plot the fitted mode wise reductions (for time and sensor axis)
+with(fit.gmlm, {
+    par.reset <- par(mfrow = c(2, 1))
+    plot(seq(0, 1, len = 256), betas[[1]], main = "Time", xlab = "Time [s]", ylab = expression(beta[1]))
+    plot(betas[[2]], main = "Sensors", xlab = "Sensor Index", ylab = expression(beta[2]))
+    par(par.reset)
+})
+
+#' Leave-one-out prediction using GMLM
+#'
+#' @param X 3D EEG data (preprocessed or not)
+#' @param F binary responce `y` as a 3D tensor, every obs. is a 1 x 1 matrix
+loo.predict.gmlm <- function(X, F) {
+    unlist(parallel::mclapply(seq_len(dim(X)[1L]), function(i) {
+        # Fit with i'th observation removed
+        fit <- gmlm_tensor_normal(X[-i, , ], F[-i, , , drop = FALSE], sample.axis = 1L)
+
+        # Reduce the entire data set
+        r <- as.vector(mlm(X, fit$betas, modes = 2:3, transpose = TRUE))
+        # Fit a logit model on reduced data with i'th observation removed
+        logit <- glm(y ~ r, family = binomial(link = "logit"),
+            data = data.frame(y = y[-i], r = r[-i])
+        )
+        # predict i'th response given i'th reduced observation
+        y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response")
+        # report progress
+        cat(sprintf("dim: (%d, %d) - %3d/%d\n", dim(X)[2L], dim(X)[3L], i, dim(X)[1L]))
+
+        y.hat
+    }, mc.cores = getOption("mc.cores", max(1L, parallel::detectCores() - 1L))))
+}
+
 # perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction
-y.hat.3.4   <- loo.predict(preprocess(X,  3,  4), F)
-y.hat.15.15 <- loo.predict(preprocess(X, 15, 15), F)
-y.hat.20.30 <- loo.predict(preprocess(X, 20, 30), F)
-y.hat       <- loo.predict(X, F)
+y.hat.3.4   <- loo.predict.gmlm(preprocess(X,  3,  4), F)
+y.hat.15.15 <- loo.predict.gmlm(preprocess(X, 15, 15), F)
+y.hat.20.30 <- loo.predict.gmlm(preprocess(X, 20, 30), F)
+y.hat       <- loo.predict.gmlm(X, F)
 
 # classification performance measures table by leave-one-out cross-validation
 (loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) {
@@ -117,3 +118,51 @@ y.hat       <- loo.predict(X, F)
 #> tnr    0.64444444  0.60000000  0.60000000 0.60000000
 #> auc    0.85108225  0.83838384  0.83924964 0.83896104
 #> auc.sd 0.03584791  0.03760531  0.03751307 0.03754553
+
+
+################################## Tensor SIR ##################################
+
+#' Leave-one-out prediction using TSIR
+#'
+#' @param X 3D EEG data (preprocessed or not)
+#' @param y binary responce vector
+loo.predict.tsir <- function(X, y) {
+    unlist(parallel::mclapply(seq_len(dim(X)[1L]), function(i) {
+        # Fit with i'th observation removed
+        fit <- TSIR(X[-i, , ], y[-i], c(1L, 1L), sample.axis = 1L)
+
+        # Reduce the entire data set
+        r <- as.vector(mlm(X, fit, modes = 2:3, transpose = TRUE))
+        # Fit a logit model on reduced data with i'th observation removed
+        logit <- glm(y ~ r, family = binomial(link = "logit"),
+            data = data.frame(y = y[-i], r = r[-i])
+        )
+        # predict i'th response given i'th reduced observation
+        y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response")
+        # report progress
+        cat(sprintf("dim: (%d, %d) - %3d/%d\n", dim(X)[2L], dim(X)[3L], i, dim(X)[1L]))
+
+        y.hat
+    }, mc.cores = getOption("mc.cores", max(1L, parallel::detectCores() - 1L))))
+}
+
+# perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction
+y.hat.3.4   <- loo.predict.tsir(preprocess(X,  3,  4), y)
+y.hat.15.15 <- loo.predict.tsir(preprocess(X, 15, 15), y)
+y.hat.20.30 <- loo.predict.tsir(preprocess(X, 20, 30), y)
+y.hat       <- loo.predict.tsir(X, y)
+
+# classification performance measures table by leave-one-out cross-validation
+(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) {
+    sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"),
+        function(FUN) { match.fun(FUN)(y, y.pred) })
+}))
+#>         y.hat.3.4 y.hat.15.15 y.hat.20.30      y.hat
+#> acc    0.79508197  0.78688525   0.7540984 0.67213115
+#> err    0.20491803  0.21311475   0.2459016 0.32786885
+#> fpr    0.33333333  0.37777778   0.3777778 0.53333333
+#> tpr    0.87012987  0.88311688   0.8311688 0.79220779
+#> fnr    0.12987013  0.11688312   0.1688312 0.20779221
+#> tnr    0.66666667  0.62222222   0.6222222 0.46666667
+#> auc    0.84646465  0.83376623   0.8040404 0.68946609
+#> auc.sd 0.03596227  0.04092069   0.0446129 0.05196611
diff --git a/dataAnalysis/eeg/eeg_data.rds b/dataAnalysis/eeg/eeg_data.rds
new file mode 100644
index 0000000..bda2a74
--- /dev/null
+++ b/dataAnalysis/eeg/eeg_data.rds
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84d2ce18ebe1beae4cb8010342d37a564b7ec99a8c6abedd32b6d4c011b6a1a4
+size 11421369