From 76d705035faf0d31042ba6b526036c8cf9563396 Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 5 Feb 2025 13:49:35 +0100 Subject: [PATCH] added eeg data and tsir to its analysis --- .gitattributes | 2 + .gitignore | 4 +- dataAnalysis/eeg/eeg.R | 129 +++++++++++++++++++++++----------- dataAnalysis/eeg/eeg_data.rds | 3 + 4 files changed, 96 insertions(+), 42 deletions(-) create mode 100644 .gitattributes create mode 100644 dataAnalysis/eeg/eeg_data.rds diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..25baab1 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +*.rds filter=lfs diff=lfs merge=lfs -text +*.Rds filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore index d332568..c6a1db2 100644 --- a/.gitignore +++ b/.gitignore @@ -45,8 +45,8 @@ ## R environment, data and package build intermediate files/folders # R Data Object files -*.Rds -*.rds +# *.Rds # git-lfs +# *.rds # git-lfs *.Rdata # Example code in package build process diff --git a/dataAnalysis/eeg/eeg.R b/dataAnalysis/eeg/eeg.R index 468c81f..a529f4f 100644 --- a/dataAnalysis/eeg/eeg.R +++ b/dataAnalysis/eeg/eeg.R @@ -25,17 +25,6 @@ c(X, F, y) %<-% local({ list(X, array(y, c(n, 1L, 1L)), y) }) -# fit a tensor normal model to the data sample axis 1 indexes persons) -fit.gmlm <- gmlm_tensor_normal(X, F, sample.axis = 1L) - -# plot the fitted mode wise reductions (for time and sensor axis) -with(fit.gmlm, { - par.reset <- par(mfrow = c(2, 1)) - plot(seq(0, 1, len = 256), betas[[1]], main = "Time", xlab = "Time [s]", ylab = expression(beta[1])) - plot(betas[[2]], main = "Sensors", xlab = "Sensor Index", ylab = expression(beta[2])) - par(par.reset) -}) - #' (2D)^2 PCA preprocessing #' @@ -54,31 +43,6 @@ preprocess <- function(X, tpc, ppc) { } -#' Leave-one-out prediction -#' -#' @param X 3D EEG data (preprocessed or not) -#' @param F binary responce `y` as a 3D tensor, every obs. is a 1 x 1 matrix -loo.predict <- function(X, F) { - sapply(seq_len(dim(X)[1L]), function(i) { - # Fit with i'th observation removes - fit <- gmlm_tensor_normal(X[-i, , ], F[-i, , , drop = FALSE], sample.axis = 1L) - - # Reduce the entire data set - r <- as.vector(mlm(X, fit$betas, modes = 2:3, transpose = TRUE)) - # Fit a logit model on reduced data with i'th observation removed - logit <- glm(y ~ r, family = binomial(link = "logit"), - data = data.frame(y = y[-i], r = r[-i]) - ) - # predict i'th response given i'th reduced observation - y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response") - # report progress - cat(sprintf("dim: (%d, %d) - %3d/%d\n", dim(X)[2L], dim(X)[3L], i, dim(X)[1L])) - - y.hat - }) -} - - ### Classification performance measures # acc: Accuracy. P(Yhat = Y). Estimated as: (TP+TN)/(P+N). acc <- function(y.true, y.pred) mean(round(y.pred) == y.true) @@ -97,11 +61,48 @@ auc <- function(y.true, y.pred) as.numeric(pROC::roc(y.true, y.pred, quiet = TRU auc.sd <- function(y.true, y.pred) sqrt(pROC::var(pROC::roc(y.true, y.pred, quiet = TRUE))) +##################################### GMLM ##################################### + +# fit a tensor normal model to the data sample axis 1 indexes persons) +fit.gmlm <- gmlm_tensor_normal(X, F, sample.axis = 1L) + +# plot the fitted mode wise reductions (for time and sensor axis) +with(fit.gmlm, { + par.reset <- par(mfrow = c(2, 1)) + plot(seq(0, 1, len = 256), betas[[1]], main = "Time", xlab = "Time [s]", ylab = expression(beta[1])) + plot(betas[[2]], main = "Sensors", xlab = "Sensor Index", ylab = expression(beta[2])) + par(par.reset) +}) + +#' Leave-one-out prediction using GMLM +#' +#' @param X 3D EEG data (preprocessed or not) +#' @param F binary responce `y` as a 3D tensor, every obs. is a 1 x 1 matrix +loo.predict.gmlm <- function(X, F) { + unlist(parallel::mclapply(seq_len(dim(X)[1L]), function(i) { + # Fit with i'th observation removed + fit <- gmlm_tensor_normal(X[-i, , ], F[-i, , , drop = FALSE], sample.axis = 1L) + + # Reduce the entire data set + r <- as.vector(mlm(X, fit$betas, modes = 2:3, transpose = TRUE)) + # Fit a logit model on reduced data with i'th observation removed + logit <- glm(y ~ r, family = binomial(link = "logit"), + data = data.frame(y = y[-i], r = r[-i]) + ) + # predict i'th response given i'th reduced observation + y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response") + # report progress + cat(sprintf("dim: (%d, %d) - %3d/%d\n", dim(X)[2L], dim(X)[3L], i, dim(X)[1L])) + + y.hat + }, mc.cores = getOption("mc.cores", max(1L, parallel::detectCores() - 1L)))) +} + # perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction -y.hat.3.4 <- loo.predict(preprocess(X, 3, 4), F) -y.hat.15.15 <- loo.predict(preprocess(X, 15, 15), F) -y.hat.20.30 <- loo.predict(preprocess(X, 20, 30), F) -y.hat <- loo.predict(X, F) +y.hat.3.4 <- loo.predict.gmlm(preprocess(X, 3, 4), F) +y.hat.15.15 <- loo.predict.gmlm(preprocess(X, 15, 15), F) +y.hat.20.30 <- loo.predict.gmlm(preprocess(X, 20, 30), F) +y.hat <- loo.predict.gmlm(X, F) # classification performance measures table by leave-one-out cross-validation (loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) { @@ -117,3 +118,51 @@ y.hat <- loo.predict(X, F) #> tnr 0.64444444 0.60000000 0.60000000 0.60000000 #> auc 0.85108225 0.83838384 0.83924964 0.83896104 #> auc.sd 0.03584791 0.03760531 0.03751307 0.03754553 + + +################################## Tensor SIR ################################## + +#' Leave-one-out prediction using TSIR +#' +#' @param X 3D EEG data (preprocessed or not) +#' @param y binary responce vector +loo.predict.tsir <- function(X, y) { + unlist(parallel::mclapply(seq_len(dim(X)[1L]), function(i) { + # Fit with i'th observation removed + fit <- TSIR(X[-i, , ], y[-i], c(1L, 1L), sample.axis = 1L) + + # Reduce the entire data set + r <- as.vector(mlm(X, fit, modes = 2:3, transpose = TRUE)) + # Fit a logit model on reduced data with i'th observation removed + logit <- glm(y ~ r, family = binomial(link = "logit"), + data = data.frame(y = y[-i], r = r[-i]) + ) + # predict i'th response given i'th reduced observation + y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response") + # report progress + cat(sprintf("dim: (%d, %d) - %3d/%d\n", dim(X)[2L], dim(X)[3L], i, dim(X)[1L])) + + y.hat + }, mc.cores = getOption("mc.cores", max(1L, parallel::detectCores() - 1L)))) +} + +# perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction +y.hat.3.4 <- loo.predict.tsir(preprocess(X, 3, 4), y) +y.hat.15.15 <- loo.predict.tsir(preprocess(X, 15, 15), y) +y.hat.20.30 <- loo.predict.tsir(preprocess(X, 20, 30), y) +y.hat <- loo.predict.tsir(X, y) + +# classification performance measures table by leave-one-out cross-validation +(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) { + sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"), + function(FUN) { match.fun(FUN)(y, y.pred) }) +})) +#> y.hat.3.4 y.hat.15.15 y.hat.20.30 y.hat +#> acc 0.79508197 0.78688525 0.7540984 0.67213115 +#> err 0.20491803 0.21311475 0.2459016 0.32786885 +#> fpr 0.33333333 0.37777778 0.3777778 0.53333333 +#> tpr 0.87012987 0.88311688 0.8311688 0.79220779 +#> fnr 0.12987013 0.11688312 0.1688312 0.20779221 +#> tnr 0.66666667 0.62222222 0.6222222 0.46666667 +#> auc 0.84646465 0.83376623 0.8040404 0.68946609 +#> auc.sd 0.03596227 0.04092069 0.0446129 0.05196611 diff --git a/dataAnalysis/eeg/eeg_data.rds b/dataAnalysis/eeg/eeg_data.rds new file mode 100644 index 0000000..bda2a74 --- /dev/null +++ b/dataAnalysis/eeg/eeg_data.rds @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84d2ce18ebe1beae4cb8010342d37a564b7ec99a8c6abedd32b6d4c011b6a1a4 +size 11421369