diff --git a/dataAnalysis/eeg/02_eeg_2d.R b/dataAnalysis/eeg/02_eeg_2d.R index e2d7ae7..eb901db 100644 --- a/dataAnalysis/eeg/02_eeg_2d.R +++ b/dataAnalysis/eeg/02_eeg_2d.R @@ -52,10 +52,10 @@ c(X, y) %<-% readRDS("eeg_data_2d.rds") #' #' @param X 3D EEG data (preprocessed or not) #' @param F binary responce `y` as a 3D tensor, every obs. is a 1 x 1 matrix -loo.predict.gmlm <- function(X, y) { +loo.predict.gmlm <- function(X, y, ...) { unlist(parallel::mclapply(seq_along(y), function(i) { # Fit with i'th observation removed - fit <- gmlm_tensor_normal(X[ , , -i], as.integer(y[-i]), sample.axis = 3L) + fit <- gmlm_tensor_normal(X[ , , -i], as.integer(y[-i]), sample.axis = 3L, ...) # Reduce the entire data set r <- as.vector(mlm(X, fit$betas, modes = 1:2, transpose = TRUE)) @@ -75,26 +75,35 @@ loo.predict.gmlm <- function(X, y) { }, mc.cores = getOption("mc.cores", max(1L, parallel::detectCores() - 1L)))) } +proj.fft <- function(beta1, nr.freq = 5L) { + F <- fft(beta1) + Re(fft(`[<-`(F, head(order(abs(F)), -nr.freq), 0+0i), inverse = TRUE)) / length(F) +} + # perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction y.hat.3.4 <- loo.predict.gmlm(preprocess(X, 3, 4), y) y.hat.15.15 <- loo.predict.gmlm(preprocess(X, 15, 15), y) y.hat.20.30 <- loo.predict.gmlm(preprocess(X, 20, 30), y) y.hat <- loo.predict.gmlm(X, y) +y.hat.fft <- loo.predict.gmlm(X, y, proj.betas = list(proj.fft, NULL)) # classification performance measures table by leave-one-out cross-validation -(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) { - sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"), - function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) }) -})) -#> y.hat.3.4 y.hat.15.15 y.hat.20.30 y.hat -#> acc 0.79508197 0.78688525 0.78688525 0.78688525 -#> err 0.20491803 0.21311475 0.21311475 0.21311475 -#> fpr 0.35555556 0.40000000 0.40000000 0.40000000 -#> tpr 0.88311688 0.89610390 0.89610390 0.89610390 -#> fnr 0.11688312 0.10389610 0.10389610 0.10389610 -#> tnr 0.64444444 0.60000000 0.60000000 0.60000000 -#> auc 0.85108225 0.83838384 0.83924964 0.83896104 -#> auc.sd 0.03584791 0.03760531 0.03751307 0.03754553 +(loo.cv <- apply( + cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat, y.hat.fft), 2, + function(y.pred) { + sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"), + function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) }) + } +)) +#> y.hat.3.4 y.hat.15.15 y.hat.20.30 y.hat y.hat.fft +#> acc 0.79508197 0.78688525 0.78688525 0.78688525 0.81147541 +#> err 0.20491803 0.21311475 0.21311475 0.21311475 0.18852459 +#> fpr 0.35555556 0.40000000 0.40000000 0.40000000 0.33333333 +#> tpr 0.88311688 0.89610390 0.89610390 0.89610390 0.89610390 +#> fnr 0.11688312 0.10389610 0.10389610 0.10389610 0.10389610 +#> tnr 0.64444444 0.60000000 0.60000000 0.60000000 0.66666667 +#> auc 0.85194805 0.83838384 0.83924964 0.83896104 0.84646465 +#> auc.sd 0.03574475 0.03760531 0.03751754 0.03754553 0.03751864 ################################## Tensor SIR ################################## diff --git a/dataAnalysis/eeg/03_eeg_3d.R b/dataAnalysis/eeg/03_eeg_3d.R index fe3709d..ff07e93 100644 --- a/dataAnalysis/eeg/03_eeg_3d.R +++ b/dataAnalysis/eeg/03_eeg_3d.R @@ -42,29 +42,6 @@ auc.sd <- function(y.true, y.pred) { sqrt(pROC::var(pROC::roc(y.true, y.pred, quiet = TRUE, direction = "<"))) } - -# # unified API for all reduction procedures -# GMLM <- list( -# fit = function(X, y) tensorPredictors::gmlm_tensor_normal(X, as.integer(y), sample.axis = 4L), -# reduce = function(X, fit) mlm(X, fit$betas, 1:3, TRUE), -# applicable = function(X) TRUE -# ) -# TSIR <- list( -# fit = function(X, y) tensorPredictors::TSIR(X, y, c(1L, 1L, 1L), sample.axis = 4L), -# reduce = function(X, fit) mlm(X, fit, 1:3, TRUE), -# applicable = function(X) TRUE -# ) -# KPIR_LS <- list( -# fit = function(X, y) { -# if (any(dim(X)[-4] > dim(X)[4])) { -# stop("Dimensions too big") -# } -# tensorPredictors::kpir.ls(X, as.integer(y), sample.axis = 4L) -# }, -# reduce = function(X, fit) if (is.null(fit)) NA else mlm(X, fit$alphas, 1:3, TRUE), -# applicable = function(X) all(dim(X)[1:3] <= dim(X)[4]) -# ) - #' Leave-one-out prediction using TSIR #' #' @param method reduction method to be applied @@ -105,14 +82,20 @@ c(X, y) %<-% readRDS("eeg_data_3d.rds") ##################################### GMLM ##################################### +proj.fft <- function(beta1, nr.freq = 5L) { + F <- fft(beta1) + Re(fft(`[<-`(F, head(order(abs(F)), -nr.freq), 0+0i), inverse = TRUE)) / length(F) +} + # perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction y.hat.3.4 <- loo.predict(gmlm_tensor_normal, preprocess(X, 3, 4, 3), y) y.hat.15.15 <- loo.predict(gmlm_tensor_normal, preprocess(X, 15, 15, 3), y) y.hat.20.30 <- loo.predict(gmlm_tensor_normal, preprocess(X, 20, 30, 3), y) y.hat <- loo.predict(gmlm_tensor_normal, X, y) +y.hat.fft <- loo.predict(gmlm_tensor_normal, X, y, proj.betas = list(proj.fft, NULL, NULL)) # classification performance measures table by leave-one-out cross-validation -(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) { +(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat, y.hat.fft), 2, function(y.pred) { sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"), function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) }) })) diff --git a/tensorPredictors/R/TSIR.R b/tensorPredictors/R/TSIR.R index 4ee49a8..fe0ec2f 100644 --- a/tensorPredictors/R/TSIR.R +++ b/tensorPredictors/R/TSIR.R @@ -6,7 +6,8 @@ TSIR <- function(X, y, sample.axis = 1L, nr.slices = 10L, # default slices, ignored if y is a factor or integer max.iter = 50L, - cond.threshold = Inf, eps = sqrt(.Machine$double.eps), + reg.threshold = Inf, reg.factor = 0.2, + eps = 1e-6, slice.method = c("cut", "ecdf"), # ignored if y is a factor or integer logger = NULL ) { @@ -122,13 +123,13 @@ TSIR <- function(X, y, Omegas <- Map(function(k) prod(dim(X)[-k])^-1 * mcrossprod(X, mode = k), modes) # Check condition of sample covariances and perform regularization if needed - if (is.finite(cond.threshold)) { + if (is.finite(reg.threshold)) { for (j in seq_along(Omegas)) { # Compute min and max eigen values min_max <- range(eigen(Omegas[[j]], TRUE, TRUE)$values) - # The condition is approximately `kappa(Omegas[[j]]) > cond.threshold` - if (min_max[2] > cond.threshold * min_max[1]) { - Omegas[[j]] <- Omegas[[j]] + diag(0.2 * min_max[2], nrow(Omegas[[j]])) + # The condition is approximately `kappa(Omegas[[j]]) > reg.threshold` + if (min_max[2] > reg.threshold * min_max[1]) { + diag(Omegas[[j]]) <- diag(Omegas[[j]]) + reg.factor * min_max[2] } } } diff --git a/tensorPredictors/R/gmlm_tensor_normal.R b/tensorPredictors/R/gmlm_tensor_normal.R index da7c4ac..db155c3 100644 --- a/tensorPredictors/R/gmlm_tensor_normal.R +++ b/tensorPredictors/R/gmlm_tensor_normal.R @@ -5,7 +5,8 @@ #' @export gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)), max.iter = 100L, proj.betas = NULL, proj.Omegas = NULL, logger = NULL, - cond.threshold = 25, eps = 1e-6 + reg.threshold = 25, reg.factor = 0.2, + eps = 1e-6 ) { # Special case for `F` being vector valued, add dims of size 1 if (is.null(dim(F))) { @@ -25,13 +26,13 @@ gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)), dimF <- head(dim(F), -1) modes <- seq_along(dimX) - # Ensure the Omega and beta projections lists are lists - if (!is.list(proj.Omegas)) { - proj.Omegas <- rep(NULL, length(modes)) - } - if (!is.list(proj.betas)) { - proj.betas <- rep(NULL, length(modes)) - } + # # Ensure the Omega and beta projections lists are lists + # if (!is.list(proj.Omegas)) { + # proj.Omegas <- rep(NULL, length(modes)) # thats just NULL, which it already is + # } + # if (!is.list(proj.betas)) { + # proj.betas <- rep(NULL, length(modes)) + # } ### Phase 1: Computing initial values (`dim<-` ensures we have an "array") @@ -106,12 +107,13 @@ gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)), # with regularization of the j'th mode covariance estimate `Sigma_j` for (j in seq_along(Sigmas)) { # Regularize Covariances - if (is.finite(cond.threshold)) { + if (is.finite(reg.threshold)) { # Compute min and max eigen values min_max <- range(eigen(Sigmas[[j]], TRUE, TRUE)$values) - # The condition is approximately `kappa(Sigmas[[j]]) > cond.threshold` - if (min_max[2] > cond.threshold * min_max[1]) { - Sigmas[[j]] <- Sigmas[[j]] + diag(0.2 * min_max[2], nrow(Sigmas[[j]])) + # The condition is approximately `kappa(Sigmas[[j]]) > reg.threshold` + if (min_max[2] > reg.threshold * min_max[1]) { + cat(sprintf("\033[2mReg (%d): cond(Sigma_%d) = %f\033[0m\n", iter, j, min_max[2] / min_max[1])) + diag(Sigmas[[j]]) <- diag(Sigmas[[j]]) + reg.factor * min_max[2] } } # Compute (unconstraint but regularized) Omega_j as covariance inverse