From e2e8d19a0a420d4574401aba11ab89b85f490871 Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 2 May 2025 17:02:20 +0200 Subject: [PATCH] wip: interface standardization and fixing bugs --- dataAnalysis/eeg/02_eeg_2d.R | 173 +++++++++++++++++++++---------- dataAnalysis/eeg/03_eeg_3d.R | 140 ++++++++++++++----------- tensorPredictors/R/LSIR.R | 12 +-- tensorPredictors/R/TSIR.R | 2 +- tensorPredictors/R/rtensornorm.R | 4 +- 5 files changed, 209 insertions(+), 122 deletions(-) diff --git a/dataAnalysis/eeg/02_eeg_2d.R b/dataAnalysis/eeg/02_eeg_2d.R index a529f4f..e2d7ae7 100644 --- a/dataAnalysis/eeg/02_eeg_2d.R +++ b/dataAnalysis/eeg/02_eeg_2d.R @@ -1,29 +1,6 @@ library(tensorPredictors) - -# Load as 3D predictors `X` and flat response `y` and `F = y` with per person dim. 1 x 1 -c(X, F, y) %<-% local({ - # Load from file - ds <- readRDS("eeg_data.rds") - - # Dimension values - n <- nrow(ds) # sample size (nr. of people) - p <- 64L # nr. of predictors (count of sensorce) - t <- 256L # nr. of time points (measurements) - - # Extract dimension names - nNames <- ds$PersonID - tNames <- as.character(seq(t)) - pNames <- unlist(strsplit(colnames(ds)[2 + t * seq(p)], "_"))[c(TRUE, FALSE)] - - # Split into predictors (with proper dims and names) and response - X <- array(as.matrix(ds[, -(1:2)]), - dim = c(person = n, time = t, sensor = p), - dimnames = list(person = nNames, time = tNames, sensor = pNames) - ) - y <- ds$Case_Control - - list(X, array(y, c(n, 1L, 1L)), y) -}) +library(parallel) +library(pROC) #' (2D)^2 PCA preprocessing @@ -32,14 +9,14 @@ c(X, F, y) %<-% local({ #' @param ppc Number of "p"redictor "p"rincipal "c"omponents. preprocess <- function(X, tpc, ppc) { # Mode covariances (for predictor and time point modes) - c(Sigma_t, Sigma_p) %<-% mcov(X, sample.axis = 1L) + c(Sigma_t, Sigma_p) %<-% mcov(X, sample.axis = 3L) # "predictor" (sensor) and time point principal components V_t <- svd(Sigma_t, tpc, 0L)$u V_p <- svd(Sigma_p, ppc, 0L)$u # reduce with mode wise PCs - mlm(X, list(V_t, V_p), modes = 2:3, transposed = TRUE) + mlm(X, list(V_t, V_p), modes = 1:2, transposed = TRUE) } @@ -57,57 +34,57 @@ fnr <- function(y.true, y.pred) mean((round(y.pred) == 0)[y.true == 1]) # tnr: True negative rate. P(Yhat = - | Y = -). tnr <- function(y.true, y.pred) mean((round(y.pred) == 0)[y.true == 0]) # auc: Area Under the Curve -auc <- function(y.true, y.pred) as.numeric(pROC::roc(y.true, y.pred, quiet = TRUE)$auc) -auc.sd <- function(y.true, y.pred) sqrt(pROC::var(pROC::roc(y.true, y.pred, quiet = TRUE))) +auc <- function(y.true, y.pred) { + as.numeric(pROC::roc(y.true, y.pred, quiet = TRUE, direction = "<")$auc) +} +auc.sd <- function(y.true, y.pred) { + sqrt(pROC::var(pROC::roc(y.true, y.pred, quiet = TRUE, direction = "<"))) +} + + +# Load 2D (S1 stimulus only) EEG dataset of all subjects +c(X, y) %<-% readRDS("eeg_data_2d.rds") ##################################### GMLM ##################################### -# fit a tensor normal model to the data sample axis 1 indexes persons) -fit.gmlm <- gmlm_tensor_normal(X, F, sample.axis = 1L) - -# plot the fitted mode wise reductions (for time and sensor axis) -with(fit.gmlm, { - par.reset <- par(mfrow = c(2, 1)) - plot(seq(0, 1, len = 256), betas[[1]], main = "Time", xlab = "Time [s]", ylab = expression(beta[1])) - plot(betas[[2]], main = "Sensors", xlab = "Sensor Index", ylab = expression(beta[2])) - par(par.reset) -}) - #' Leave-one-out prediction using GMLM #' #' @param X 3D EEG data (preprocessed or not) #' @param F binary responce `y` as a 3D tensor, every obs. is a 1 x 1 matrix -loo.predict.gmlm <- function(X, F) { - unlist(parallel::mclapply(seq_len(dim(X)[1L]), function(i) { +loo.predict.gmlm <- function(X, y) { + unlist(parallel::mclapply(seq_along(y), function(i) { # Fit with i'th observation removed - fit <- gmlm_tensor_normal(X[-i, , ], F[-i, , , drop = FALSE], sample.axis = 1L) + fit <- gmlm_tensor_normal(X[ , , -i], as.integer(y[-i]), sample.axis = 3L) # Reduce the entire data set - r <- as.vector(mlm(X, fit$betas, modes = 2:3, transpose = TRUE)) + r <- as.vector(mlm(X, fit$betas, modes = 1:2, transpose = TRUE)) # Fit a logit model on reduced data with i'th observation removed logit <- glm(y ~ r, family = binomial(link = "logit"), data = data.frame(y = y[-i], r = r[-i]) ) # predict i'th response given i'th reduced observation y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response") + # report progress - cat(sprintf("dim: (%d, %d) - %3d/%d\n", dim(X)[2L], dim(X)[3L], i, dim(X)[1L])) + cat(sprintf("dim: (%d, %d) - %3d/%d\n", + dim(X)[1L], dim(X)[2L], i, length(y)) + ) y.hat }, mc.cores = getOption("mc.cores", max(1L, parallel::detectCores() - 1L)))) } # perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction -y.hat.3.4 <- loo.predict.gmlm(preprocess(X, 3, 4), F) -y.hat.15.15 <- loo.predict.gmlm(preprocess(X, 15, 15), F) -y.hat.20.30 <- loo.predict.gmlm(preprocess(X, 20, 30), F) -y.hat <- loo.predict.gmlm(X, F) +y.hat.3.4 <- loo.predict.gmlm(preprocess(X, 3, 4), y) +y.hat.15.15 <- loo.predict.gmlm(preprocess(X, 15, 15), y) +y.hat.20.30 <- loo.predict.gmlm(preprocess(X, 20, 30), y) +y.hat <- loo.predict.gmlm(X, y) # classification performance measures table by leave-one-out cross-validation (loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) { sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"), - function(FUN) { match.fun(FUN)(y, y.pred) }) + function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) }) })) #> y.hat.3.4 y.hat.15.15 y.hat.20.30 y.hat #> acc 0.79508197 0.78688525 0.78688525 0.78688525 @@ -126,21 +103,26 @@ y.hat <- loo.predict.gmlm(X, F) #' #' @param X 3D EEG data (preprocessed or not) #' @param y binary responce vector -loo.predict.tsir <- function(X, y) { - unlist(parallel::mclapply(seq_len(dim(X)[1L]), function(i) { +loo.predict.tsir <- function(X, y, cond.threshold = Inf) { + unlist(parallel::mclapply(seq_along(y), function(i) { # Fit with i'th observation removed - fit <- TSIR(X[-i, , ], y[-i], c(1L, 1L), sample.axis = 1L) + fit <- TSIR(X[ , , -i], y[-i], sample.axis = 3L, + cond.threshold = cond.threshold + ) # Reduce the entire data set - r <- as.vector(mlm(X, fit, modes = 2:3, transpose = TRUE)) + r <- as.vector(mlm(X, fit, modes = 1:2, transpose = TRUE)) # Fit a logit model on reduced data with i'th observation removed logit <- glm(y ~ r, family = binomial(link = "logit"), data = data.frame(y = y[-i], r = r[-i]) ) # predict i'th response given i'th reduced observation y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response") + # report progress - cat(sprintf("dim: (%d, %d) - %3d/%d\n", dim(X)[2L], dim(X)[3L], i, dim(X)[1L])) + cat(sprintf("dim: (%d, %d) - %3d/%d\n", + dim(X)[1], dim(X)[2], i, length(y) + )) y.hat }, mc.cores = getOption("mc.cores", max(1L, parallel::detectCores() - 1L)))) @@ -155,7 +137,7 @@ y.hat <- loo.predict.tsir(X, y) # classification performance measures table by leave-one-out cross-validation (loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) { sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"), - function(FUN) { match.fun(FUN)(y, y.pred) }) + function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) }) })) #> y.hat.3.4 y.hat.15.15 y.hat.20.30 y.hat #> acc 0.79508197 0.78688525 0.7540984 0.67213115 @@ -166,3 +148,82 @@ y.hat <- loo.predict.tsir(X, y) #> tnr 0.66666667 0.62222222 0.6222222 0.46666667 #> auc 0.84646465 0.83376623 0.8040404 0.68946609 #> auc.sd 0.03596227 0.04092069 0.0446129 0.05196611 + + +# perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction +# including mode-wise covariance regularization via condition threshold similar +# to the regularization employed by tensor-normal GMLM +y.hat.3.4 <- loo.predict.tsir(preprocess(X, 3, 4), y, cond.threshold = 25) +y.hat.15.15 <- loo.predict.tsir(preprocess(X, 15, 15), y, cond.threshold = 25) +y.hat.20.30 <- loo.predict.tsir(preprocess(X, 20, 30), y, cond.threshold = 25) +y.hat <- loo.predict.tsir(X, y, cond.threshold = 25) + +# classification performance measures table by leave-one-out cross-validation +(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) { + sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"), + function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) }) +})) +#> y.hat.3.4 y.hat.15.15 y.hat.20.30 y.hat +#> acc 0.79508197 0.78688525 0.78688525 0.78688525 +#> err 0.20491803 0.21311475 0.21311475 0.21311475 +#> fpr 0.33333333 0.40000000 0.40000000 0.40000000 +#> tpr 0.87012987 0.89610390 0.89610390 0.89610390 +#> fnr 0.12987013 0.10389610 0.10389610 0.10389610 +#> tnr 0.66666667 0.60000000 0.60000000 0.60000000 +#> auc 0.84646465 0.84329004 0.84444444 0.84386724 +#> auc.sd 0.03596227 0.03666439 0.03636842 0.03650638 + + +##################################### LSIR ##################################### + +#' Leave-one-out prediction using LSIR +#' +#' @param X 3D EEG data (preprocessed or not) +#' @param y binary responce vector +#' @param cond.threshold (approx) condition number threshold to apply +#' regularization to the mode-wise covariances `Cov(X_(j))`, a value of `Inf` +#' means "no regularization". +loo.predict.lsir <- function(X, y) { + unlist(parallel::mclapply(seq_along(y), function(i) { + # Fit with i'th observation removed + fit <- LSIR(X[ , , -i], y[-i], sample.axis = 3L) + + # Reduce the entire data set + r <- as.vector(mlm(X, fit$betas, modes = 1:2, transpose = TRUE)) + # Fit a logit model on reduced data with i'th observation removed + logit <- glm(y ~ r, family = binomial(link = "logit"), + data = data.frame(y = y[-i], r = r[-i]) + ) + # predict i'th response given i'th reduced observation + y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response") + + # report progress + cat(sprintf("dim: (%d, %d) - %3d/%d\n", + dim(X)[1], dim(X)[2], i, length(y) + )) + + y.hat + }, mc.cores = getOption("mc.cores", max(1L, parallel::detectCores() - 1L)))) +} + +# perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction +y.hat.3.4 <- loo.predict.lsir(preprocess(X, 3, 4), y) +y.hat.15.15 <- loo.predict.lsir(preprocess(X, 15, 15), y) +y.hat.20.30 <- loo.predict.lsir(preprocess(X, 20, 30), y) +y.hat <- loo.predict.lsir(X, y) + + +# classification performance measures table by leave-one-out cross-validation +(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) { + sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"), + function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) }) +})) +#> y.hat.3.4 y.hat.15.15 y.hat.20.30 y.hat +#> acc 0.79508197 0.72131148 0.78688525 0.4918033 +#> err 0.20491803 0.27868852 0.21311475 0.5081967 +#> fpr 0.35555556 0.44444444 0.35555556 0.7333333 +#> tpr 0.88311688 0.81818182 0.87012987 0.6233766 +#> fnr 0.11688312 0.18181818 0.12987013 0.3766234 +#> tnr 0.64444444 0.55555556 0.64444444 0.2666667 +#> auc 0.84963925 0.81298701 0.83145743 0.3909091 +#> auc.sd 0.03639394 0.03998711 0.03815816 0.0540805 diff --git a/dataAnalysis/eeg/03_eeg_3d.R b/dataAnalysis/eeg/03_eeg_3d.R index 66dbe95..fe3709d 100644 --- a/dataAnalysis/eeg/03_eeg_3d.R +++ b/dataAnalysis/eeg/03_eeg_3d.R @@ -2,7 +2,7 @@ library(tensorPredictors) library(parallel) library(pROC) -#' Mode-Wise PCA preprocessing +#' Mode-Wise PCA preprocessing (generalized (2D)^2 PCA) #' #' @param npc_time Number of Principal Components for time axis #' @param npc_sensor Number of Principal Components for sensor axis @@ -35,24 +35,50 @@ fnr <- function(y.true, y.pred) mean((round(y.pred) == 0)[y.true == 1]) # tnr: True negative rate. P(Yhat = - | Y = -). tnr <- function(y.true, y.pred) mean((round(y.pred) == 0)[y.true == 0]) # auc: Area Under the Curve -auc <- function(y.true, y.pred) as.numeric(pROC::roc(y.true, y.pred, quiet = TRUE)$auc) -auc.sd <- function(y.true, y.pred) sqrt(pROC::var(pROC::roc(y.true, y.pred, quiet = TRUE))) +auc <- function(y.true, y.pred) { + as.numeric(pROC::roc(y.true, y.pred, quiet = TRUE, direction = "<")$auc) +} +auc.sd <- function(y.true, y.pred) { + sqrt(pROC::var(pROC::roc(y.true, y.pred, quiet = TRUE, direction = "<"))) +} -# Load full EEG dataset of all subjects -c(X, y) %<-% readRDS("eeg_data_3d.rds") +# # unified API for all reduction procedures +# GMLM <- list( +# fit = function(X, y) tensorPredictors::gmlm_tensor_normal(X, as.integer(y), sample.axis = 4L), +# reduce = function(X, fit) mlm(X, fit$betas, 1:3, TRUE), +# applicable = function(X) TRUE +# ) +# TSIR <- list( +# fit = function(X, y) tensorPredictors::TSIR(X, y, c(1L, 1L, 1L), sample.axis = 4L), +# reduce = function(X, fit) mlm(X, fit, 1:3, TRUE), +# applicable = function(X) TRUE +# ) +# KPIR_LS <- list( +# fit = function(X, y) { +# if (any(dim(X)[-4] > dim(X)[4])) { +# stop("Dimensions too big") +# } +# tensorPredictors::kpir.ls(X, as.integer(y), sample.axis = 4L) +# }, +# reduce = function(X, fit) if (is.null(fit)) NA else mlm(X, fit$alphas, 1:3, TRUE), +# applicable = function(X) all(dim(X)[1:3] <= dim(X)[4]) +# ) - -##################################### GMLM ##################################### - -#' Leave-one-out prediction using GMLM +#' Leave-one-out prediction using TSIR #' +#' @param method reduction method to be applied #' @param X 3D EEG data (preprocessed or not) -#' @param y binary responce `y` as a 3D tensor, every obs. is a 1 x 1 matrix -loo.predict.gmlm <- function(X, y) { +#' @param y binary responce vector +#' @param ... additional arguments passed on to `method` +loo.predict <- function(method, X, y, ...) { + # get method function name as character string for logging + method.name <- as.character(substitute(method)) + + # Parallel Leave-One-Out prediction unlist(parallel::mclapply(seq_along(y), function(i) { # Fit with i'th observation removed - fit <- gmlm_tensor_normal(X[ , , , -i], as.integer(y[-i]), sample.axis = 4L) + fit <- method(X[ , , , -i], y[-i], sample.axis = 4L, ...) # Reduce the entire data set r <- as.vector(mlm(X, fit$betas, modes = 1:3, transpose = TRUE)) @@ -64,8 +90,8 @@ loo.predict.gmlm <- function(X, y) { y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response") # report progress - cat(sprintf("dim: (%d, %d, %d) - %3d/%d\n", - dim(X)[1], dim(X)[2], dim(X)[3], i, length(y) + cat(sprintf("%s - dim: (%d, %d, %d) - %3d/%d\n", + method.name, dim(X)[1], dim(X)[2], dim(X)[3], i, length(y) )) y.hat @@ -73,65 +99,41 @@ loo.predict.gmlm <- function(X, y) { } -# perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction -y.hat.3.4 <- loo.predict.gmlm(preprocess(X, 3, 4, 3), y) -y.hat.15.15 <- loo.predict.gmlm(preprocess(X, 15, 15, 3), y) -y.hat.20.30 <- loo.predict.gmlm(preprocess(X, 20, 30, 3), y) -y.hat <- loo.predict.gmlm(X, y) +# Load full EEG dataset (3D tensor for each subject) +c(X, y) %<-% readRDS("eeg_data_3d.rds") +##################################### GMLM ##################################### + +# perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction +y.hat.3.4 <- loo.predict(gmlm_tensor_normal, preprocess(X, 3, 4, 3), y) +y.hat.15.15 <- loo.predict(gmlm_tensor_normal, preprocess(X, 15, 15, 3), y) +y.hat.20.30 <- loo.predict(gmlm_tensor_normal, preprocess(X, 20, 30, 3), y) +y.hat <- loo.predict(gmlm_tensor_normal, X, y) + # classification performance measures table by leave-one-out cross-validation (loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) { sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"), function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) }) })) #> y.hat.3.4 y.hat.15.15 y.hat.20.30 y.hat -#> acc 0.83606557 0.80327869 0.80327869 0.80327869 -#> err 0.16393443 0.19672131 0.19672131 0.19672131 -#> fpr 0.31111111 0.33333333 0.33333333 0.33333333 +#> acc 0.83606557 0.80327869 0.80327869 0.79508197 +#> err 0.16393443 0.19672131 0.19672131 0.20491803 +#> fpr 0.31111111 0.33333333 0.33333333 0.35555556 #> tpr 0.92207792 0.88311688 0.88311688 0.88311688 #> fnr 0.07792208 0.11688312 0.11688312 0.11688312 -#> tnr 0.68888889 0.66666667 0.66666667 0.66666667 -#> auc 0.88023088 0.87070707 0.87041847 0.86810967 -#> auc.sd 0.03124875 0.03244623 0.03248653 0.03295883 +#> tnr 0.68888889 0.66666667 0.66666667 0.64444444 +#> auc 0.88051948 0.86984127 0.86926407 0.86810967 +#> auc.sd 0.03118211 0.03254642 0.03259186 0.03295883 ################################## Tensor SIR ################################## -#' Leave-one-out prediction using TSIR -#' -#' @param X 3D EEG data (preprocessed or not) -#' @param y binary responce vector -loo.predict.tsir <- function(X, y) { - unlist(parallel::mclapply(seq_along(y), function(i) { - # Fit with i'th observation removed - fit <- TSIR(X[ , , , -i], y[-i], c(1L, 1L, 1L), sample.axis = 4L) - - # Reduce the entire data set - r <- as.vector(mlm(X, fit, modes = 1:3, transpose = TRUE)) - # Fit a logit model on reduced data with i'th observation removed - logit <- glm(y ~ r, family = binomial(link = "logit"), - data = data.frame(y = y[-i], r = r[-i]) - ) - # predict i'th response given i'th reduced observation - y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response") - - # report progress - cat(sprintf("dim: (%d, %d, %d) - %3d/%d\n", - dim(X)[1], dim(X)[2], dim(X)[3], i, length(y) - )) - - y.hat - }, mc.cores = getOption("mc.cores", max(1L, parallel::detectCores() - 1L)))) -} - - # perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction -y.hat.3.4 <- loo.predict.tsir(preprocess(X, 3, 4, 3), y) -y.hat.15.15 <- loo.predict.tsir(preprocess(X, 15, 15, 3), y) -y.hat.20.30 <- loo.predict.tsir(preprocess(X, 20, 30, 3), y) -y.hat <- loo.predict.tsir(X, y) - +y.hat.3.4 <- loo.predict(TSIR, preprocess(X, 3, 4, 3), y) +y.hat.15.15 <- loo.predict(TSIR, preprocess(X, 15, 15, 3), y) +y.hat.20.30 <- loo.predict(TSIR, preprocess(X, 20, 30, 3), y) +y.hat <- loo.predict(TSIR, X, y) # classification performance measures table by leave-one-out cross-validation (loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) { @@ -147,3 +149,25 @@ y.hat <- loo.predict.tsir(X, y) #> tnr 0.66666667 0.75555556 0.66666667 0.66666667 #> auc 0.86522367 0.89379509 0.88196248 0.85974026 #> auc.sd 0.03357539 0.03055047 0.02986038 0.03367847 + + +# perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction +y.hat.3.4 <- loo.predict(TSIR, preprocess(X, 3, 4, 3), y, cond.threshold = 25) +y.hat.15.15 <- loo.predict(TSIR, preprocess(X, 15, 15, 3), y, cond.threshold = 25) +y.hat.20.30 <- loo.predict(TSIR, preprocess(X, 20, 30, 3), y, cond.threshold = 25) +y.hat <- loo.predict(TSIR, X, y, cond.threshold = 25) + +# classification performance measures table by leave-one-out cross-validation +(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) { + sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"), + function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) }) +})) +#> y.hat.3.4 y.hat.15.15 y.hat.20.30 y.hat +#> acc 0.81967213 0.77049180 0.76229508 0.77049180 +#> err 0.18032787 0.22950820 0.23770492 0.22950820 +#> fpr 0.33333333 0.37777778 0.40000000 0.37777778 +#> tpr 0.90909091 0.85714286 0.85714286 0.85714286 +#> fnr 0.09090909 0.14285714 0.14285714 0.14285714 +#> tnr 0.66666667 0.62222222 0.60000000 0.62222222 +#> auc 0.86522367 0.84386724 0.84415584 0.84040404 +#> auc.sd 0.03357539 0.03542706 0.03519592 0.03558135 diff --git a/tensorPredictors/R/LSIR.R b/tensorPredictors/R/LSIR.R index 8c4e1b9..2adabae 100644 --- a/tensorPredictors/R/LSIR.R +++ b/tensorPredictors/R/LSIR.R @@ -40,20 +40,20 @@ LSIR <- function(X, y, modes <- seq_along(dim(X))[-1L] n <- dim(X)[1L] - sigmas <- lapply(seq_along(modes), function(i) { + Sigmas <- lapply(seq_along(modes), function(i) { matrix(rowMeans(apply(X, modes[-i], cov)), dim(X)[modes[i]]) }) # Omega_i = Sigma_i^{-1 / 2} - isqrt_sigmas <- Map(matpow, sigmas, -1 / 2) + isqrt_Sigmas <- Map(matpow, Sigmas, -1 / 2) # Normalize observations - Z <- mlm(X - rep(colMeans(X), each = dim(X)[1L]), isqrt_sigmas, modes = modes) + Z <- mlm(X - rep(colMeans(X), each = dim(X)[1L]), isqrt_Sigmas, modes = modes) # Estimate conditional covariances Omega = Cov(E[Z | Y]) slice.args <- c( list(Z), rep(alist(, )[1], length(dim(X))), list(drop = FALSE) ) - omegas <- lapply(seq_along(modes), function(i) { + Omegas <- lapply(seq_along(modes), function(i) { matrix(Reduce(`+`, lapply(levels(y), function(l) { slice.args[[2]] <- y == l rowMeans(apply(do.call(`[`, slice.args), modes[-i], function(z) { @@ -65,7 +65,7 @@ LSIR <- function(X, y, # Compute central subspace basis estimate betas <- mapply(function(isqrt_sigma, omega, reduction_dim) { isqrt_sigma %*% La.svd(omega, reduction_dim, 0L)$u - }, isqrt_sigmas, omegas, reduction.dims, SIMPLIFY = FALSE) + }, isqrt_Sigmas, Omegas, reduction.dims, SIMPLIFY = FALSE) - list(betas = betas) + list(betas = betas, Sigmas = Sigmas, Omegas = Omegas) } diff --git a/tensorPredictors/R/TSIR.R b/tensorPredictors/R/TSIR.R index 2ce7dee..1c364d1 100644 --- a/tensorPredictors/R/TSIR.R +++ b/tensorPredictors/R/TSIR.R @@ -135,5 +135,5 @@ TSIR <- function(X, y, # reductions matrices `Omega_k^-1 Gamma_k` where there (reverse) kronecker # product spans the central tensor subspace (CTS) estimate - structure(Map(solve, Omegas, Gammas), mcov = Omegas, Gammas = Gammas) + list(betas = Map(solve, Omegas, Gammas), Omegas = Omegas, Gammas = Gammas) } diff --git a/tensorPredictors/R/rtensornorm.R b/tensorPredictors/R/rtensornorm.R index 46b2356..1033306 100644 --- a/tensorPredictors/R/rtensornorm.R +++ b/tensorPredictors/R/rtensornorm.R @@ -31,7 +31,9 @@ rtensornorm <- function(n, mean, cov, sample.axis = 1L) { } # add mean (using recycling, observations on last mode) - X <- X + c(mean) + if (!missing(mean)) { + X <- X + c(mean) + } # permute axis for indexing observations on sample mode (permute first axis # with sample mode axis)