add: reg.factor (regularization factor),

add: FFT to EEG sim
This commit is contained in:
Daniel Kapla 2025-11-05 17:21:33 +01:00
parent 491688378c
commit 82816e96a6
4 changed files with 51 additions and 56 deletions

View File

@ -52,10 +52,10 @@ c(X, y) %<-% readRDS("eeg_data_2d.rds")
#' #'
#' @param X 3D EEG data (preprocessed or not) #' @param X 3D EEG data (preprocessed or not)
#' @param F binary responce `y` as a 3D tensor, every obs. is a 1 x 1 matrix #' @param F binary responce `y` as a 3D tensor, every obs. is a 1 x 1 matrix
loo.predict.gmlm <- function(X, y) { loo.predict.gmlm <- function(X, y, ...) {
unlist(parallel::mclapply(seq_along(y), function(i) { unlist(parallel::mclapply(seq_along(y), function(i) {
# Fit with i'th observation removed # Fit with i'th observation removed
fit <- gmlm_tensor_normal(X[ , , -i], as.integer(y[-i]), sample.axis = 3L) fit <- gmlm_tensor_normal(X[ , , -i], as.integer(y[-i]), sample.axis = 3L, ...)
# Reduce the entire data set # Reduce the entire data set
r <- as.vector(mlm(X, fit$betas, modes = 1:2, transpose = TRUE)) r <- as.vector(mlm(X, fit$betas, modes = 1:2, transpose = TRUE))
@ -75,26 +75,35 @@ loo.predict.gmlm <- function(X, y) {
}, mc.cores = getOption("mc.cores", max(1L, parallel::detectCores() - 1L)))) }, mc.cores = getOption("mc.cores", max(1L, parallel::detectCores() - 1L))))
} }
proj.fft <- function(beta1, nr.freq = 5L) {
F <- fft(beta1)
Re(fft(`[<-`(F, head(order(abs(F)), -nr.freq), 0+0i), inverse = TRUE)) / length(F)
}
# perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction # perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction
y.hat.3.4 <- loo.predict.gmlm(preprocess(X, 3, 4), y) y.hat.3.4 <- loo.predict.gmlm(preprocess(X, 3, 4), y)
y.hat.15.15 <- loo.predict.gmlm(preprocess(X, 15, 15), y) y.hat.15.15 <- loo.predict.gmlm(preprocess(X, 15, 15), y)
y.hat.20.30 <- loo.predict.gmlm(preprocess(X, 20, 30), y) y.hat.20.30 <- loo.predict.gmlm(preprocess(X, 20, 30), y)
y.hat <- loo.predict.gmlm(X, y) y.hat <- loo.predict.gmlm(X, y)
y.hat.fft <- loo.predict.gmlm(X, y, proj.betas = list(proj.fft, NULL))
# classification performance measures table by leave-one-out cross-validation # classification performance measures table by leave-one-out cross-validation
(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) { (loo.cv <- apply(
cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat, y.hat.fft), 2,
function(y.pred) {
sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"), sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"),
function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) }) function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) })
})) }
#> y.hat.3.4 y.hat.15.15 y.hat.20.30 y.hat ))
#> acc 0.79508197 0.78688525 0.78688525 0.78688525 #> y.hat.3.4 y.hat.15.15 y.hat.20.30 y.hat y.hat.fft
#> err 0.20491803 0.21311475 0.21311475 0.21311475 #> acc 0.79508197 0.78688525 0.78688525 0.78688525 0.81147541
#> fpr 0.35555556 0.40000000 0.40000000 0.40000000 #> err 0.20491803 0.21311475 0.21311475 0.21311475 0.18852459
#> tpr 0.88311688 0.89610390 0.89610390 0.89610390 #> fpr 0.35555556 0.40000000 0.40000000 0.40000000 0.33333333
#> fnr 0.11688312 0.10389610 0.10389610 0.10389610 #> tpr 0.88311688 0.89610390 0.89610390 0.89610390 0.89610390
#> tnr 0.64444444 0.60000000 0.60000000 0.60000000 #> fnr 0.11688312 0.10389610 0.10389610 0.10389610 0.10389610
#> auc 0.85108225 0.83838384 0.83924964 0.83896104 #> tnr 0.64444444 0.60000000 0.60000000 0.60000000 0.66666667
#> auc.sd 0.03584791 0.03760531 0.03751307 0.03754553 #> auc 0.85194805 0.83838384 0.83924964 0.83896104 0.84646465
#> auc.sd 0.03574475 0.03760531 0.03751754 0.03754553 0.03751864
################################## Tensor SIR ################################## ################################## Tensor SIR ##################################

View File

@ -42,29 +42,6 @@ auc.sd <- function(y.true, y.pred) {
sqrt(pROC::var(pROC::roc(y.true, y.pred, quiet = TRUE, direction = "<"))) sqrt(pROC::var(pROC::roc(y.true, y.pred, quiet = TRUE, direction = "<")))
} }
# # unified API for all reduction procedures
# GMLM <- list(
# fit = function(X, y) tensorPredictors::gmlm_tensor_normal(X, as.integer(y), sample.axis = 4L),
# reduce = function(X, fit) mlm(X, fit$betas, 1:3, TRUE),
# applicable = function(X) TRUE
# )
# TSIR <- list(
# fit = function(X, y) tensorPredictors::TSIR(X, y, c(1L, 1L, 1L), sample.axis = 4L),
# reduce = function(X, fit) mlm(X, fit, 1:3, TRUE),
# applicable = function(X) TRUE
# )
# KPIR_LS <- list(
# fit = function(X, y) {
# if (any(dim(X)[-4] > dim(X)[4])) {
# stop("Dimensions too big")
# }
# tensorPredictors::kpir.ls(X, as.integer(y), sample.axis = 4L)
# },
# reduce = function(X, fit) if (is.null(fit)) NA else mlm(X, fit$alphas, 1:3, TRUE),
# applicable = function(X) all(dim(X)[1:3] <= dim(X)[4])
# )
#' Leave-one-out prediction using TSIR #' Leave-one-out prediction using TSIR
#' #'
#' @param method reduction method to be applied #' @param method reduction method to be applied
@ -105,14 +82,20 @@ c(X, y) %<-% readRDS("eeg_data_3d.rds")
##################################### GMLM ##################################### ##################################### GMLM #####################################
proj.fft <- function(beta1, nr.freq = 5L) {
F <- fft(beta1)
Re(fft(`[<-`(F, head(order(abs(F)), -nr.freq), 0+0i), inverse = TRUE)) / length(F)
}
# perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction # perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction
y.hat.3.4 <- loo.predict(gmlm_tensor_normal, preprocess(X, 3, 4, 3), y) y.hat.3.4 <- loo.predict(gmlm_tensor_normal, preprocess(X, 3, 4, 3), y)
y.hat.15.15 <- loo.predict(gmlm_tensor_normal, preprocess(X, 15, 15, 3), y) y.hat.15.15 <- loo.predict(gmlm_tensor_normal, preprocess(X, 15, 15, 3), y)
y.hat.20.30 <- loo.predict(gmlm_tensor_normal, preprocess(X, 20, 30, 3), y) y.hat.20.30 <- loo.predict(gmlm_tensor_normal, preprocess(X, 20, 30, 3), y)
y.hat <- loo.predict(gmlm_tensor_normal, X, y) y.hat <- loo.predict(gmlm_tensor_normal, X, y)
y.hat.fft <- loo.predict(gmlm_tensor_normal, X, y, proj.betas = list(proj.fft, NULL, NULL))
# classification performance measures table by leave-one-out cross-validation # classification performance measures table by leave-one-out cross-validation
(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) { (loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat, y.hat.fft), 2, function(y.pred) {
sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"), sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"),
function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) }) function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) })
})) }))

View File

@ -6,7 +6,8 @@ TSIR <- function(X, y,
sample.axis = 1L, sample.axis = 1L,
nr.slices = 10L, # default slices, ignored if y is a factor or integer nr.slices = 10L, # default slices, ignored if y is a factor or integer
max.iter = 50L, max.iter = 50L,
cond.threshold = Inf, eps = sqrt(.Machine$double.eps), reg.threshold = Inf, reg.factor = 0.2,
eps = 1e-6,
slice.method = c("cut", "ecdf"), # ignored if y is a factor or integer slice.method = c("cut", "ecdf"), # ignored if y is a factor or integer
logger = NULL logger = NULL
) { ) {
@ -122,13 +123,13 @@ TSIR <- function(X, y,
Omegas <- Map(function(k) prod(dim(X)[-k])^-1 * mcrossprod(X, mode = k), modes) Omegas <- Map(function(k) prod(dim(X)[-k])^-1 * mcrossprod(X, mode = k), modes)
# Check condition of sample covariances and perform regularization if needed # Check condition of sample covariances and perform regularization if needed
if (is.finite(cond.threshold)) { if (is.finite(reg.threshold)) {
for (j in seq_along(Omegas)) { for (j in seq_along(Omegas)) {
# Compute min and max eigen values # Compute min and max eigen values
min_max <- range(eigen(Omegas[[j]], TRUE, TRUE)$values) min_max <- range(eigen(Omegas[[j]], TRUE, TRUE)$values)
# The condition is approximately `kappa(Omegas[[j]]) > cond.threshold` # The condition is approximately `kappa(Omegas[[j]]) > reg.threshold`
if (min_max[2] > cond.threshold * min_max[1]) { if (min_max[2] > reg.threshold * min_max[1]) {
Omegas[[j]] <- Omegas[[j]] + diag(0.2 * min_max[2], nrow(Omegas[[j]])) diag(Omegas[[j]]) <- diag(Omegas[[j]]) + reg.factor * min_max[2]
} }
} }
} }

View File

@ -5,7 +5,8 @@
#' @export #' @export
gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)), gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)),
max.iter = 100L, proj.betas = NULL, proj.Omegas = NULL, logger = NULL, max.iter = 100L, proj.betas = NULL, proj.Omegas = NULL, logger = NULL,
cond.threshold = 25, eps = 1e-6 reg.threshold = 25, reg.factor = 0.2,
eps = 1e-6
) { ) {
# Special case for `F` being vector valued, add dims of size 1 # Special case for `F` being vector valued, add dims of size 1
if (is.null(dim(F))) { if (is.null(dim(F))) {
@ -25,13 +26,13 @@ gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)),
dimF <- head(dim(F), -1) dimF <- head(dim(F), -1)
modes <- seq_along(dimX) modes <- seq_along(dimX)
# Ensure the Omega and beta projections lists are lists # # Ensure the Omega and beta projections lists are lists
if (!is.list(proj.Omegas)) { # if (!is.list(proj.Omegas)) {
proj.Omegas <- rep(NULL, length(modes)) # proj.Omegas <- rep(NULL, length(modes)) # thats just NULL, which it already is
} # }
if (!is.list(proj.betas)) { # if (!is.list(proj.betas)) {
proj.betas <- rep(NULL, length(modes)) # proj.betas <- rep(NULL, length(modes))
} # }
### Phase 1: Computing initial values (`dim<-` ensures we have an "array") ### Phase 1: Computing initial values (`dim<-` ensures we have an "array")
@ -106,12 +107,13 @@ gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)),
# with regularization of the j'th mode covariance estimate `Sigma_j` # with regularization of the j'th mode covariance estimate `Sigma_j`
for (j in seq_along(Sigmas)) { for (j in seq_along(Sigmas)) {
# Regularize Covariances # Regularize Covariances
if (is.finite(cond.threshold)) { if (is.finite(reg.threshold)) {
# Compute min and max eigen values # Compute min and max eigen values
min_max <- range(eigen(Sigmas[[j]], TRUE, TRUE)$values) min_max <- range(eigen(Sigmas[[j]], TRUE, TRUE)$values)
# The condition is approximately `kappa(Sigmas[[j]]) > cond.threshold` # The condition is approximately `kappa(Sigmas[[j]]) > reg.threshold`
if (min_max[2] > cond.threshold * min_max[1]) { if (min_max[2] > reg.threshold * min_max[1]) {
Sigmas[[j]] <- Sigmas[[j]] + diag(0.2 * min_max[2], nrow(Sigmas[[j]])) cat(sprintf("\033[2mReg (%d): cond(Sigma_%d) = %f\033[0m\n", iter, j, min_max[2] / min_max[1]))
diag(Sigmas[[j]]) <- diag(Sigmas[[j]]) + reg.factor * min_max[2]
} }
} }
# Compute (unconstraint but regularized) Omega_j as covariance inverse # Compute (unconstraint but regularized) Omega_j as covariance inverse