add: reg.factor (regularization factor),

add: FFT to EEG sim
2025-11-05 17:21:33 +01:00 · 2025-11-05 17:21:33 +01:00 · 82816e96a6
commit 82816e96a6
parent 491688378c
4 changed files with 51 additions and 56 deletions
--- a/dataAnalysis/eeg/02_eeg_2d.R
+++ b/dataAnalysis/eeg/02_eeg_2d.R
@ -52,10 +52,10 @@ c(X, y) %<-% readRDS("eeg_data_2d.rds")
 #'
 #' @param X 3D EEG data (preprocessed or not)
 #' @param F binary responce `y` as a 3D tensor, every obs. is a 1 x 1 matrix
-loo.predict.gmlm <- function(X, y) {
+loo.predict.gmlm <- function(X, y, ...) {
    unlist(parallel::mclapply(seq_along(y), function(i) {
        # Fit with i'th observation removed
-        fit <- gmlm_tensor_normal(X[ , , -i], as.integer(y[-i]), sample.axis = 3L)
+        fit <- gmlm_tensor_normal(X[ , , -i], as.integer(y[-i]), sample.axis = 3L, ...)

        # Reduce the entire data set
        r <- as.vector(mlm(X, fit$betas, modes = 1:2, transpose = TRUE))
@ -75,26 +75,35 @@ loo.predict.gmlm <- function(X, y) {
    }, mc.cores = getOption("mc.cores", max(1L, parallel::detectCores() - 1L))))
 }

+proj.fft <- function(beta1, nr.freq = 5L) {
+    F <- fft(beta1)
+    Re(fft(`[<-`(F, head(order(abs(F)), -nr.freq), 0+0i), inverse = TRUE)) / length(F)
+}
+
 # perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction
 y.hat.3.4   <- loo.predict.gmlm(preprocess(X,  3,  4), y)
 y.hat.15.15 <- loo.predict.gmlm(preprocess(X, 15, 15), y)
 y.hat.20.30 <- loo.predict.gmlm(preprocess(X, 20, 30), y)
 y.hat       <- loo.predict.gmlm(X, y)
+y.hat.fft   <- loo.predict.gmlm(X, y, proj.betas = list(proj.fft, NULL))

 # classification performance measures table by leave-one-out cross-validation
-(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) {
-    sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"),
-        function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) })
-}))
-#>         y.hat.3.4 y.hat.15.15 y.hat.20.30      y.hat
-#> acc    0.79508197  0.78688525  0.78688525 0.78688525
-#> err    0.20491803  0.21311475  0.21311475 0.21311475
-#> fpr    0.35555556  0.40000000  0.40000000 0.40000000
-#> tpr    0.88311688  0.89610390  0.89610390 0.89610390
-#> fnr    0.11688312  0.10389610  0.10389610 0.10389610
-#> tnr    0.64444444  0.60000000  0.60000000 0.60000000
-#> auc    0.85108225  0.83838384  0.83924964 0.83896104
-#> auc.sd 0.03584791  0.03760531  0.03751307 0.03754553
+(loo.cv <- apply(
+    cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat, y.hat.fft), 2,
+    function(y.pred) {
+        sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"),
+            function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) })
+    }
+))
+#>         y.hat.3.4 y.hat.15.15 y.hat.20.30      y.hat  y.hat.fft
+#> acc    0.79508197  0.78688525  0.78688525 0.78688525 0.81147541
+#> err    0.20491803  0.21311475  0.21311475 0.21311475 0.18852459
+#> fpr    0.35555556  0.40000000  0.40000000 0.40000000 0.33333333
+#> tpr    0.88311688  0.89610390  0.89610390 0.89610390 0.89610390
+#> fnr    0.11688312  0.10389610  0.10389610 0.10389610 0.10389610
+#> tnr    0.64444444  0.60000000  0.60000000 0.60000000 0.66666667
+#> auc    0.85194805  0.83838384  0.83924964 0.83896104 0.84646465
+#> auc.sd 0.03574475  0.03760531  0.03751754 0.03754553 0.03751864


 ################################## Tensor SIR ##################################
--- a/dataAnalysis/eeg/03_eeg_3d.R
+++ b/dataAnalysis/eeg/03_eeg_3d.R
@ -42,29 +42,6 @@ auc.sd <- function(y.true, y.pred) {
    sqrt(pROC::var(pROC::roc(y.true, y.pred, quiet = TRUE, direction = "<")))
 }

-
-# # unified API for all reduction procedures
-# GMLM <- list(
-#     fit = function(X, y) tensorPredictors::gmlm_tensor_normal(X, as.integer(y), sample.axis = 4L),
-#     reduce = function(X, fit) mlm(X, fit$betas, 1:3, TRUE),
-#     applicable = function(X) TRUE
-# )
-# TSIR <- list(
-#     fit = function(X, y) tensorPredictors::TSIR(X, y, c(1L, 1L, 1L), sample.axis = 4L),
-#     reduce = function(X, fit) mlm(X, fit, 1:3, TRUE),
-#     applicable = function(X) TRUE
-# )
-# KPIR_LS <- list(
-#     fit = function(X, y) {
-#         if (any(dim(X)[-4] > dim(X)[4])) {
-#             stop("Dimensions too big")
-#         }
-#         tensorPredictors::kpir.ls(X, as.integer(y), sample.axis = 4L)
-#     },
-#     reduce = function(X, fit) if (is.null(fit)) NA else mlm(X, fit$alphas, 1:3, TRUE),
-#     applicable = function(X) all(dim(X)[1:3] <= dim(X)[4])
-# )
-
 #' Leave-one-out prediction using TSIR
 #'
 #' @param method reduction method to be applied
@ -105,14 +82,20 @@ c(X, y) %<-% readRDS("eeg_data_3d.rds")

 ##################################### GMLM #####################################

+proj.fft <- function(beta1, nr.freq = 5L) {
+    F <- fft(beta1)
+    Re(fft(`[<-`(F, head(order(abs(F)), -nr.freq), 0+0i), inverse = TRUE)) / length(F)
+}
+
 # perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction
 y.hat.3.4   <- loo.predict(gmlm_tensor_normal, preprocess(X,  3,  4, 3), y)
 y.hat.15.15 <- loo.predict(gmlm_tensor_normal, preprocess(X, 15, 15, 3), y)
 y.hat.20.30 <- loo.predict(gmlm_tensor_normal, preprocess(X, 20, 30, 3), y)
 y.hat       <- loo.predict(gmlm_tensor_normal, X, y)
+y.hat.fft   <- loo.predict(gmlm_tensor_normal, X, y, proj.betas = list(proj.fft, NULL, NULL))

 # classification performance measures table by leave-one-out cross-validation
-(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) {
+(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat, y.hat.fft), 2, function(y.pred) {
    sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"),
        function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) })
 }))
--- a/tensorPredictors/R/TSIR.R
+++ b/tensorPredictors/R/TSIR.R
@ -6,7 +6,8 @@ TSIR <- function(X, y,
    sample.axis = 1L,
    nr.slices = 10L,    # default slices, ignored if y is a factor or integer
    max.iter = 50L,
-    cond.threshold = Inf, eps = sqrt(.Machine$double.eps),
+    reg.threshold = Inf, reg.factor = 0.2,
+    eps = 1e-6,
    slice.method = c("cut", "ecdf"),  # ignored if y is a factor or integer
    logger = NULL
 ) {
@ -122,13 +123,13 @@ TSIR <- function(X, y,
    Omegas <- Map(function(k) prod(dim(X)[-k])^-1 * mcrossprod(X, mode = k), modes)

    # Check condition of sample covariances and perform regularization if needed
-    if (is.finite(cond.threshold)) {
+    if (is.finite(reg.threshold)) {
        for (j in seq_along(Omegas)) {
            # Compute min and max eigen values
            min_max <- range(eigen(Omegas[[j]], TRUE, TRUE)$values)
-            # The condition is approximately `kappa(Omegas[[j]]) > cond.threshold`
-            if (min_max[2] > cond.threshold * min_max[1]) {
-                Omegas[[j]] <- Omegas[[j]] + diag(0.2 * min_max[2], nrow(Omegas[[j]]))
+            # The condition is approximately `kappa(Omegas[[j]]) > reg.threshold`
+            if (min_max[2] > reg.threshold * min_max[1]) {
+                diag(Omegas[[j]]) <- diag(Omegas[[j]]) + reg.factor * min_max[2]
            }
        }
    }
--- a/tensorPredictors/R/gmlm_tensor_normal.R
+++ b/tensorPredictors/R/gmlm_tensor_normal.R
@ -5,7 +5,8 @@
 #' @export
 gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)),
    max.iter = 100L, proj.betas = NULL, proj.Omegas = NULL, logger = NULL,
-    cond.threshold = 25, eps = 1e-6
+    reg.threshold = 25, reg.factor = 0.2,
+    eps = 1e-6
 ) {
    # Special case for `F` being vector valued, add dims of size 1
    if (is.null(dim(F))) {
@ -25,13 +26,13 @@ gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)),
    dimF <- head(dim(F), -1)
    modes <- seq_along(dimX)

-    # Ensure the Omega and beta projections lists are lists
-    if (!is.list(proj.Omegas)) {
-        proj.Omegas <- rep(NULL, length(modes))
-    }
-    if (!is.list(proj.betas)) {
-        proj.betas <- rep(NULL, length(modes))
-    }
+    # # Ensure the Omega and beta projections lists are lists
+    # if (!is.list(proj.Omegas)) {
+    #     proj.Omegas <- rep(NULL, length(modes))   # thats just NULL, which it already is
+    # }
+    # if (!is.list(proj.betas)) {
+    #     proj.betas <- rep(NULL, length(modes))
+    # }


    ### Phase 1: Computing initial values (`dim<-` ensures we have an "array")
@ -106,12 +107,13 @@ gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)),
        # with regularization of the j'th mode covariance estimate `Sigma_j`
        for (j in seq_along(Sigmas)) {
            # Regularize Covariances
-            if (is.finite(cond.threshold)) {
+            if (is.finite(reg.threshold)) {
                # Compute min and max eigen values
                min_max <- range(eigen(Sigmas[[j]], TRUE, TRUE)$values)
-                # The condition is approximately `kappa(Sigmas[[j]]) > cond.threshold`
-                if (min_max[2] > cond.threshold * min_max[1]) {
-                    Sigmas[[j]] <- Sigmas[[j]] + diag(0.2 * min_max[2], nrow(Sigmas[[j]]))
+                # The condition is approximately `kappa(Sigmas[[j]]) > reg.threshold`
+                if (min_max[2] > reg.threshold * min_max[1]) {
+                    cat(sprintf("\033[2mReg (%d): cond(Sigma_%d) = %f\033[0m\n", iter, j, min_max[2] / min_max[1]))
+                    diag(Sigmas[[j]]) <- diag(Sigmas[[j]]) + reg.factor * min_max[2]
                }
            }
            # Compute (unconstraint but regularized) Omega_j as covariance inverse