parent
b94b48091e
commit
62f1656d96
|
@ -0,0 +1,202 @@
|
|||
library(tensorPredictors)
|
||||
suppressPackageStartupMessages({
|
||||
library(ggplot2)
|
||||
})
|
||||
|
||||
################################################################################
|
||||
### Loading EEG Data ###
|
||||
################################################################################
|
||||
|
||||
# Load as 3D predictors `X` and flat response `y`
|
||||
c(X, y) %<-% local({
|
||||
# Load from file
|
||||
ds <- readRDS("eeg_data.rds")
|
||||
|
||||
# Dimension values
|
||||
n <- nrow(ds) # sample size (nr. of people)
|
||||
p <- 64L # nr. of predictors (count of sensorce)
|
||||
t <- 256L # nr. of time points (measurements)
|
||||
|
||||
# Extract dimension names
|
||||
nNames <- ds$PersonID
|
||||
tNames <- as.character(seq(t))
|
||||
pNames <- unlist(strsplit(colnames(ds)[2 + t * seq(p)], "_"))[c(TRUE, FALSE)]
|
||||
|
||||
# Split into predictors (with proper dims and names) and response
|
||||
X <- array(as.matrix(ds[, -(1:2)]),
|
||||
dim = c(person = n, time = t, sensor = p),
|
||||
dimnames = list(person = nNames, time = tNames, sensor = pNames)
|
||||
)
|
||||
y <- ds$Case_Control
|
||||
|
||||
list(X, y)
|
||||
})
|
||||
|
||||
################################################################################
|
||||
### LOO-CV for Multiple Methods ###
|
||||
################################################################################
|
||||
|
||||
# compatibility wrapper for function implemented with the "old" API
|
||||
toNewAPI <- function(func) {
|
||||
function(...) {
|
||||
res <- func(...)
|
||||
list(alphas = list(res$beta, res$alpha))
|
||||
}
|
||||
}
|
||||
|
||||
# Number of (2D)^2 PCA components per axis
|
||||
npcs <- list(c(3, 4), c(15, 15), c(20, 30), dim(X)[-1])
|
||||
|
||||
# setup methods for simulation (with unified API)
|
||||
methods <- list(
|
||||
kpir.base = list(
|
||||
fun = toNewAPI(kpir.base),
|
||||
is.applicable = function(npc) prod(npc) < 100
|
||||
),
|
||||
kpir.new.vlp = list(
|
||||
fun = toNewAPI(function(X, Fy) kpir.new(X, Fy, init.method = "vlp")),
|
||||
is.applicable = function(npc) prod(npc) < 100
|
||||
),
|
||||
kpir.new.ls = list(
|
||||
fun = toNewAPI(function(X, Fy) kpir.new(X, Fy, init.method = "ls")),
|
||||
is.applicable = function(npc) prod(npc) < 100
|
||||
),
|
||||
kpir.ls = list(
|
||||
fun = kpir.ls,
|
||||
is.applicable = function(npc) TRUE
|
||||
),
|
||||
kpir.momentum.vlp = list(
|
||||
fun = toNewAPI(function(X, Fy) kpir.momentum(X, Fy, init.method = "vlp")),
|
||||
is.applicable = function(npc) prod(npc) < 100
|
||||
),
|
||||
kpir.momentum.ls = list(
|
||||
fun = toNewAPI(function(X, Fy) kpir.momentum(X, Fy, init.method = "ls")),
|
||||
is.applicable = function(npc) prod(npc) < 100
|
||||
),
|
||||
kpir.approx.vlp = list(
|
||||
fun = toNewAPI(function(X, Fy) kpir.approx(X, Fy, init.method = "vlp")),
|
||||
is.applicable = function(npc) prod(npc) < 100
|
||||
),
|
||||
kpir.approx.ls = list(
|
||||
fun = toNewAPI(function(X, Fy) kpir.approx(X, Fy, init.method = "ls")),
|
||||
is.applicable = function(npc) TRUE
|
||||
)
|
||||
)
|
||||
|
||||
# define AUC for reporting while simulation is running
|
||||
auc <- function(y_true, y_pred) pROC::roc(y_true, y_pred, quiet = TRUE)$auc[1]
|
||||
|
||||
# init complete simulation as empty
|
||||
sim <- NULL
|
||||
for (npc in npcs) {
|
||||
# check if any PC count is smaller than the axis
|
||||
if (any(npc < dim(X)[-1])) {
|
||||
# Reduce dimensions using (2D)^2 PCA, which is a special case of the Higher
|
||||
# Order Principal Component Analysis
|
||||
pcs <- hopca(X, npc = npc, sample.axis = 1)
|
||||
# Reduce dimensions
|
||||
X.pc <- mlm(X, Map(t, pcs), modes = 2:3)
|
||||
} else {
|
||||
# No reduction
|
||||
X.pc <- X
|
||||
}
|
||||
|
||||
for (name in names(methods)) {
|
||||
# check if method can be applied to current reduction dimensions
|
||||
if (!methods[[name]]$is.applicable(npc)) {
|
||||
next
|
||||
}
|
||||
|
||||
# extract method to be applied
|
||||
method <- methods[[name]]$fun
|
||||
|
||||
# report name of current simulation method
|
||||
cat(sprintf("npc: (t = %d, p = %d), method: %s\n", npc[1], npc[2], name))
|
||||
|
||||
# Leave-One-Out Cross-Validation
|
||||
loo.cv <- data.frame(
|
||||
y_true = y, y_pred = NA, # CV responses
|
||||
elapsed = NA, sys.self = NA, user.self = NA # execution time
|
||||
)
|
||||
for (i in seq_len(nrow(X.pc))) {
|
||||
# report progress
|
||||
cat(sprintf("\r%3d/%d", i, nrow(X.pc)))
|
||||
|
||||
# Split into training/test data
|
||||
X.train <- X.pc[-i, , ]
|
||||
y.train <- scale(y[-i], scale = FALSE)
|
||||
X.test <- X.pc[i, , , drop = FALSE]
|
||||
y.test <- scale(y[i], center = attr(y.train, "scaled:center"), scale = FALSE)
|
||||
|
||||
# fit reduction (with method one of the methods to be "validated")
|
||||
time <- system.time(sdr <- method(X.train, c(y.train)))
|
||||
|
||||
# reduce training data and fit a GLM
|
||||
x.train <- mlm(X.train, Map(t, sdr$alphas), modes = 2:3)
|
||||
fit <- glm(y ~ x, family = binomial(link = "logit"),
|
||||
data = data.frame(y = y[-i], x = matrix(x.train, nrow(x.train))))
|
||||
|
||||
# predict from reduced test data
|
||||
x.test <- mlm(X.test, Map(t, sdr$alphas), modes = 2:3)
|
||||
y.pred <- predict(fit, data.frame(x = matrix(x.test, 1)), type = "response")
|
||||
|
||||
loo.cv[i, "y_pred"] <- y.pred
|
||||
loo.cv[i, "elapsed"] <- time["elapsed"]
|
||||
loo.cv[i, "sys.self"] <- time["sys.self"]
|
||||
loo.cv[i, "user.self"] <- time["user.self"]
|
||||
}
|
||||
|
||||
# accumulate LOO-CV results to previous results
|
||||
loo.cv$method <- factor(name)
|
||||
loo.cv$npc <- factor(sprintf("(%d, %d)", npc[1], npc[2]))
|
||||
sim <- rbind(sim, loo.cv)
|
||||
|
||||
# Report partial sim done and one of the interesting measures
|
||||
cat(sprintf(" (Done) AUC: %f\n", with(loo.cv, auc(y_true, y_pred))))
|
||||
|
||||
# dump simulation (after each fold) to file
|
||||
saveRDS(sim, "eeg_sim.rds")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
################################################################################
|
||||
### Simulation Stats ###
|
||||
################################################################################
|
||||
# sim <- readRDS("eeg_sim.rds")
|
||||
|
||||
metrics <- list(
|
||||
# acc: Accuracy. P(Yhat = Y). Estimated as: (TP+TN)/(P+N).
|
||||
"Acc" = function(y_true, y_pred) mean(round(y_pred) == y_true),
|
||||
# err: Error rate. P(Yhat != Y). Estimated as: (FP+FN)/(P+N).
|
||||
"Err" = function(y_true, y_pred) mean(round(y_pred) != y_true),
|
||||
# fpr: False positive rate. P(Yhat = + | Y = -). aliases: Fallout.
|
||||
"FPR" = function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 0]),
|
||||
# tpr: True positive rate. P(Yhat = + | Y = +). aliases: Sensitivity, Recall.
|
||||
"TPR" = function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 1]),
|
||||
# fnr: False negative rate. P(Yhat = - | Y = +). aliases: Miss.
|
||||
"FNR" = function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 1]),
|
||||
# tnr: True negative rate. P(Yhat = - | Y = -).
|
||||
"TNR" = function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 0]),
|
||||
# auc: Area Under the Curve
|
||||
"AUC" = function(y_true, y_pred) pROC::roc(y_true, y_pred, quiet = TRUE)$auc[1],
|
||||
# auc.sd: Estimated standard error of the AUC estimate
|
||||
"sd(AUC)" = function(y_true, y_pred)
|
||||
sqrt(pROC::var(pROC::roc(y_true, y_pred, quiet = TRUE)))
|
||||
)
|
||||
|
||||
# Applies metrics on a group
|
||||
do.stats <- function(group) {
|
||||
stat <- Map(do.call, metrics, list(as.list(group[c("y_true", "y_pred")])))
|
||||
data.frame(method = group$method[1], npc = group$npc[1], stat, check.names = FALSE)
|
||||
}
|
||||
# Call stats for each grouping
|
||||
stats <- do.call(rbind, Map(do.stats, split(sim, ~ method + npc, sep = " ")))
|
||||
rownames(stats) <- NULL
|
||||
|
||||
print(stats, digits = 2)
|
||||
|
||||
# and execution time stats
|
||||
times <- aggregate(cbind(elapsed, sys.self, user.self) ~ method + npc, sim, median)
|
||||
|
||||
print(times, digits = 2)
|
|
@ -70,7 +70,7 @@ sim <- function(X, Fy, alpha.true, beta.true, max.iter = 500L) {
|
|||
logger = logger("new.ls"))
|
||||
|
||||
# Least Squares estimate (alternating estimation)
|
||||
kpir.ls(X, Fy, sample.mode = 1L, max.iter = max.iter, logger = logger("ls"))
|
||||
kpir.ls(X, Fy, sample.axis = 1L, max.iter = max.iter, logger = logger("ls"))
|
||||
|
||||
# Gradient Descent with Nesterov Momentum
|
||||
kpir.momentum(X, Fy, max.iter = max.iter, init.method = "vlp",
|
||||
|
@ -237,7 +237,7 @@ for (rep in 1:reps) {
|
|||
))
|
||||
dim(Fy) <- c(n, k, r)
|
||||
X <- mlm(Fy, alpha.1, alpha.2, modes = 3:2)
|
||||
X <- X + rtensornorm(n, 0, Delta.1, Delta.2, sample.mode = 1L)
|
||||
X <- X + rtensornorm(n, 0, Delta.1, Delta.2, sample.axis = 1L)
|
||||
|
||||
hist.sim <- sim(X, Fy, alpha.1.true, alpha.2.true, max.iter = max.iter)
|
||||
hist.sim$repetition <- rep
|
||||
|
@ -331,7 +331,7 @@ sim3 <- function(X, Fy, alphas.true, max.iter = 500L) {
|
|||
))
|
||||
|
||||
# Approximated MLE with Nesterov Momentum
|
||||
kpir.ls(X, Fy, sample.mode = 1L, max.iter = max.iter, logger = logger("ls"))
|
||||
kpir.ls(X, Fy, sample.axis = 1L, max.iter = max.iter, logger = logger("ls"))
|
||||
|
||||
# Add method tags
|
||||
hist.ls$method <- factor("ls")
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
# Generated by roxygen2: do not edit by hand
|
||||
|
||||
export("%<-%")
|
||||
export("%x_1%")
|
||||
export("%x_2%")
|
||||
export("%x_3%")
|
||||
|
@ -13,6 +14,7 @@ export(approx.kronecker)
|
|||
export(colKronecker)
|
||||
export(dist.projection)
|
||||
export(dist.subspace)
|
||||
export(hopca)
|
||||
export(kpir.approx)
|
||||
export(kpir.base)
|
||||
export(kpir.ls)
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
#' Higher Order Principal Component Analysis
|
||||
#'
|
||||
#' @param X multi-dimensional array (at least a matrix)
|
||||
#' @param npc Number of Principal Components for each axis, if not specified
|
||||
#' its the maximum
|
||||
#' @param sample.axis index of the sample mode, a.k.a. observation axis index
|
||||
#'
|
||||
#' @return list of matrices, each entry are the first PCs of the corresponding
|
||||
#' axis. The `i`'th entry are the `npc[i]` first Principal Components of the
|
||||
#' `i`th axis excluding the sample axis (note: this means there is an index
|
||||
#' shift after the sample axis).
|
||||
#'
|
||||
#' @export
|
||||
hopca <- function(X, npc = dim(X)[-sample.axis], sample.axis = 1L) {
|
||||
# observation index numbers (all axis except the sample axis)
|
||||
modes <- seq_along(dim(X))[-sample.axis]
|
||||
|
||||
# Mean (a.k.a. sum elements over the sample axis)
|
||||
mu <- apply(X, modes, mean, simplify = TRUE)
|
||||
# Center `X` by subtraction of the mean `mu` from each observation
|
||||
X.centered <- sweep(X, modes, mu)
|
||||
|
||||
# PCA for each mode (axis)
|
||||
PCs <- Map(function(i) {
|
||||
La.svd(mcrossprod(X.centered, modes[i]), npc[i], 0)$u
|
||||
}, seq_along(modes))
|
||||
|
||||
# Set names if any
|
||||
if (!is.null(dimnames(X))) {
|
||||
names(PCs) <- names(dimnames(X)[-sample.axis])
|
||||
for (i in seq_along(modes)) {
|
||||
dimnames(PCs[[i]]) <- list(dimnames(X)[-sample.axis][[i]], NULL)
|
||||
}
|
||||
}
|
||||
|
||||
PCs
|
||||
}
|
|
@ -57,7 +57,7 @@ kpir.approx <- function(X, Fy, shape = c(dim(X)[-1], dim(Fy[-1])),
|
|||
dim(Fy) <- c(n, k, r)
|
||||
dim(X) <- c(n, p, q)
|
||||
|
||||
ls <- kpir.ls(X, Fy, max.iter = max.init.iter, sample.mode = 1L, eps = eps)
|
||||
ls <- kpir.ls(X, Fy, max.iter = max.init.iter, sample.axis = 1L, eps = eps)
|
||||
c(beta0, alpha0) %<-% ls$alphas
|
||||
} else { # Van Loan and Pitsianis
|
||||
# Vectorize
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
#' Per mode (axis) alternating least squares estimate
|
||||
#'
|
||||
#' @param sample.mode index of the sample mode, a.k.a. observation axis index
|
||||
#' @param sample.axis index of the sample mode, a.k.a. observation axis index
|
||||
#'
|
||||
#' @export
|
||||
kpir.ls <- function(X, Fy, max.iter = 20L, sample.mode = 1L,
|
||||
kpir.ls <- function(X, Fy, max.iter = 20L, sample.axis = 1L,
|
||||
eps = .Machine$double.eps, logger = NULL
|
||||
) {
|
||||
# Check if X and Fy have same number of observations
|
||||
|
@ -11,20 +11,20 @@ kpir.ls <- function(X, Fy, max.iter = 20L, sample.mode = 1L,
|
|||
# scalar response case (add new axis of size 1)
|
||||
dim(Fy) <- local({
|
||||
dims <- rep(1, length(dim(X)))
|
||||
dims[sample.mode] <- length(Fy)
|
||||
dims[sample.axis] <- length(Fy)
|
||||
dims
|
||||
})
|
||||
} else {
|
||||
stopifnot(dim(X)[sample.mode] == dim(Fy)[sample.mode])
|
||||
stopifnot(dim(X)[sample.axis] == dim(Fy)[sample.axis])
|
||||
}
|
||||
# Check dimensions
|
||||
stopifnot(length(dim(X)) == length(dim(Fy)))
|
||||
stopifnot(dim(X)[sample.mode] == dim(Fy)[sample.mode])
|
||||
stopifnot(dim(X)[sample.axis] == dim(Fy)[sample.axis])
|
||||
# and model constraints
|
||||
stopifnot(all(dim(Fy) <= dim(X)))
|
||||
|
||||
# mode index sequence (exclude sample mode, a.k.a. observation axis)
|
||||
modes <- seq_along(dim(X))[-sample.mode]
|
||||
modes <- seq_along(dim(X))[-sample.axis]
|
||||
|
||||
|
||||
### Step 1: initial per mode estimates
|
||||
|
@ -68,7 +68,7 @@ kpir.ls <- function(X, Fy, max.iter = 20L, sample.mode = 1L,
|
|||
R <- X - mlm(Fy, alphas, modes = modes)
|
||||
# Moment estimates for `Delta_i`s
|
||||
Deltas <- Map(mcrossprod, list(R), mode = modes)
|
||||
Deltas <- Map(`*`, 1 / dim(X)[sample.mode], Deltas)
|
||||
Deltas <- Map(`*`, 1 / dim(X)[sample.axis], Deltas)
|
||||
|
||||
list(
|
||||
alphas = structure(alphas, names = as.character(modes)),
|
||||
|
|
|
@ -54,7 +54,7 @@ kpir.momentum <- function(X, Fy, shape = c(dim(X)[-1], dim(Fy[-1])),
|
|||
if (init.method == "ls") {
|
||||
dim(X) <- c(n, p, q)
|
||||
dim(Fy) <- c(n, k, r)
|
||||
ls <- kpir.ls(X, Fy, max.iter = max.init.iter, sample.mode = 1L, eps = eps)
|
||||
ls <- kpir.ls(X, Fy, max.iter = max.init.iter, sample.axis = 1L, eps = eps)
|
||||
c(beta0, alpha0) %<-% ls$alphas
|
||||
dim(X) <- c(n, p * q)
|
||||
dim(Fy) <- c(n, k * r)
|
||||
|
|
|
@ -51,7 +51,7 @@ kpir.new <- function(X, Fy, shape = c(dim(X)[-1], dim(Fy[-1])),
|
|||
if (init.method == "ls") {
|
||||
dim(X) <- c(n, p, q)
|
||||
dim(Fy) <- c(n, k, r)
|
||||
ls <- kpir.ls(X, Fy, max.iter = max.init.iter, sample.mode = 1L, eps = eps)
|
||||
ls <- kpir.ls(X, Fy, max.iter = max.init.iter, sample.axis = 1L, eps = eps)
|
||||
c(beta, alpha) %<-% ls$alphas
|
||||
dim(X) <- c(n, p * q)
|
||||
dim(Fy) <- c(n, k * r)
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
#' # extracting the first three valus from the vector of length 10.
|
||||
#' }
|
||||
#'
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
"%<-%" <- function(lhs, rhs) {
|
||||
var.names <- make.names(as.list(substitute(lhs))[-1])
|
||||
values <- as.list(rhs)
|
||||
|
@ -31,5 +31,5 @@
|
|||
for (i in seq_along(var.names)) {
|
||||
assign(var.names[i], values[[i]], envir = env)
|
||||
}
|
||||
lhs
|
||||
invisible(lhs)
|
||||
}
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
#' X <- rtensornorm(n, 0, Sigma.1, Sigma.2)
|
||||
#'
|
||||
#' @export
|
||||
rtensornorm <- function(n, mean, ..., sample.mode) {
|
||||
rtensornorm <- function(n, mean, ..., sample.axis) {
|
||||
# get covariance matrices
|
||||
cov <- list(...)
|
||||
|
||||
|
@ -38,10 +38,10 @@ rtensornorm <- function(n, mean, ..., sample.mode) {
|
|||
|
||||
# permute axis for indeing observations on sample mode (permute first axis
|
||||
# with sample mode axis)
|
||||
if (!missing(sample.mode)) {
|
||||
if (!missing(sample.axis)) {
|
||||
axis <- seq_len(length(dims) - 1)
|
||||
start <- seq_len(sample.mode - 1)
|
||||
end <- seq_len(length(dims) - sample.mode) + sample.mode - 1
|
||||
start <- seq_len(sample.axis - 1)
|
||||
end <- seq_len(length(dims) - sample.axis) + sample.axis - 1
|
||||
X <- aperm(X, c(axis[start], length(dims), axis[end]))
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue